From 77cd67be4fa8f94b750dc71181eeab2d965b8fa1 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Thu, 18 May 2023 13:35:54 +0100 Subject: [PATCH] Anopheles refactor part 5 - AnophelesSnpData (#393) * start refactoring snp data functions * fix typing * comments * fix logic error * comments * site_filters * snp_sites * fix errors * snp_genotypes, is_accessible * snp_variants * site_annotations, snp_calls * snp_allele_counts; plot_snps * fix typing * fix regression * fix bug * wip sample_indices * parameter validation * parameter validation * add typeguard to dependencies * use typeguard, simplify types * fix typing bug * fix bugs * add more typechecked annotations; tighten up region parsing * fix typing bug * further typing improvements * fix typing bug * more type hints * fix typing * fix typing * tweaks * fix typing * fix typing * fix bokeh type * check notebooks * disable typeguard because leaking memory * fix test failures * strip typeguard annotations * fix typing errors * fix typing error * squash bugs * add typeguard to fast tests on ci * squash bugs * fix snafu * ignore notebooks output * home-rolled type check decorator * improve message * depend on typeguard * wip simulate genotypes * wip simulate genotypes * wip simulate genotypes * wip test_snp_data * wip test_snp_data * wip test_snp_data - open_snp_sites * wip test_snp_data * wip refactor tests * squashed commits * update poetry * deal with runs of Ns better in plot_snps --- .github/workflows/tests.yml | 5 +- .gitignore | 3 +- malariagen_data/af1.py | 4 +- malariagen_data/ag3.py | 78 +- malariagen_data/anoph/base.py | 158 ++- malariagen_data/anoph/genome_features.py | 41 +- malariagen_data/anoph/genome_sequence.py | 14 +- malariagen_data/anoph/sample_metadata.py | 95 +- malariagen_data/anoph/snp_data.py | 1293 ++++++++++++++++++ malariagen_data/anopheles.py | 1514 ++-------------------- malariagen_data/util.py | 186 ++- notebooks/plot_snps.ipynb | 121 +- notebooks/repr.ipynb | 832 +----------- notebooks/spike_sim_test_data.ipynb | 993 ++++++++++++++ poetry.lock | 1362 +++++++++---------- pyproject.toml | 1 + tests/anoph/conftest.py | 446 ++++++- tests/anoph/test_base.py | 5 +- tests/anoph/test_genome_sequence.py | 4 +- tests/anoph/test_sample_metadata.py | 6 +- tests/anoph/test_snp_data.py | 763 +++++++++++ tests/test_af1.py | 364 +----- tests/test_ag3.py | 336 +---- tests/test_anopheles.py | 85 +- 24 files changed, 4789 insertions(+), 3920 deletions(-) create mode 100644 malariagen_data/anoph/snp_data.py create mode 100644 notebooks/spike_sim_test_data.ipynb create mode 100644 tests/anoph/test_snp_data.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5ceca91d9..9a2eaa7ed 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -29,9 +29,10 @@ jobs: - name: Install dependencies run: poetry install - # Run a subset of tests first which run quickly to fail fast. + # Run a subset of tests first which run quickly without accessing + # any remote data in order to fail fast where possible. - name: Run fast unit tests - run: poetry run pytest -v tests/anoph + run: poetry run pytest -v tests/anoph --typeguard-packages=malariagen_data,malariagen_data.anoph - name: Restore GCS cache uses: actions/cache/restore@v3 diff --git a/.gitignore b/.gitignore index ac19c1bd9..038958d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ tests/anoph/fixture/simulated *~ # nbconvert outputs -*.nbconvert.ipynb +notebooks/*.nbconvert.ipynb +notebooks/*.html diff --git a/malariagen_data/af1.py b/malariagen_data/af1.py index 24d4ee571..c991a3889 100644 --- a/malariagen_data/af1.py +++ b/malariagen_data/af1.py @@ -18,7 +18,6 @@ G123_CALIBRATION_CACHE_NAME = "af1_g123_calibration_v1" H1X_GWSS_CACHE_NAME = "af1_h1x_gwss_v1" IHS_GWSS_CACHE_NAME = "af1_ihs_gwss_v1" -DEFAULT_SITE_MASK = "funestus" class Af1(AnophelesDataResource): @@ -83,8 +82,6 @@ class Af1(AnophelesDataResource): _g123_calibration_cache_name = G123_CALIBRATION_CACHE_NAME _h1x_gwss_cache_name = H1X_GWSS_CACHE_NAME _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME - site_mask_ids = ("funestus",) - _default_site_mask = DEFAULT_SITE_MASK phasing_analysis_ids = ("funestus",) _default_phasing_analysis = "funestus" @@ -109,6 +106,7 @@ def __init__( aim_analysis=None, aim_metadata_dtype=None, site_filters_analysis=site_filters_analysis, + default_site_mask="funestus", bokeh_output_notebook=bokeh_output_notebook, results_cache=results_cache, log=log, diff --git a/malariagen_data/ag3.py b/malariagen_data/ag3.py index fd2edd561..6e07766a6 100644 --- a/malariagen_data/ag3.py +++ b/malariagen_data/ag3.py @@ -2,6 +2,7 @@ import warnings from bisect import bisect_left, bisect_right from textwrap import dedent +from typing import List import dask import dask.array as da @@ -13,6 +14,7 @@ import malariagen_data # used for .__version__ +from .anoph.base import base_params from .anopheles import AnophelesDataResource, gplt_params from .util import ( DIM_SAMPLE, @@ -20,6 +22,9 @@ Region, da_from_zarr, init_zarr_store, + parse_multi_region, + parse_single_region, + region_str, simple_xarray_concat, ) @@ -43,7 +48,6 @@ G123_GWSS_CACHE_NAME = "ag3_g123_gwss_v1" H1X_GWSS_CACHE_NAME = "ag3_h1x_gwss_v1" IHS_GWSS_CACHE_NAME = "ag3_ihs_gwss_v1" -DEFAULT_SITE_MASK = "gamb_colu_arab" class Ag3(AnophelesDataResource): @@ -113,8 +117,6 @@ class Ag3(AnophelesDataResource): _g123_calibration_cache_name = G123_CALIBRATION_CACHE_NAME _h1x_gwss_cache_name = H1X_GWSS_CACHE_NAME _ihs_gwss_cache_name = IHS_GWSS_CACHE_NAME - site_mask_ids = ("gamb_colu_arab", "gamb_colu", "arab") - _default_site_mask = DEFAULT_SITE_MASK phasing_analysis_ids = ("gamb_colu_arab", "gamb_colu", "arab") _default_phasing_analysis = "gamb_colu_arab" @@ -147,6 +149,7 @@ def __init__( "aim_species": object, }, site_filters_analysis=site_filters_analysis, + default_site_mask="gamb_colu_arab", bokeh_output_notebook=bokeh_output_notebook, results_cache=results_cache, log=log, @@ -675,7 +678,7 @@ def _cnv_hmm_dataset(self, *, contig, sample_set, inline_array, chunks): def cnv_hmm( self, - region, + region: base_params.region, sample_sets=None, sample_query=None, max_coverage_variance=DEFAULT_MAX_COVERAGE_VARIANCE, @@ -717,13 +720,12 @@ def cnv_hmm( debug("normalise parameters") sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - region = self.resolve_region(region) - if isinstance(region, Region): - region = [region] + regions: List[Region] = parse_multi_region(self, region) + del region debug("access CNV HMM data and concatenate as needed") lx = [] - for r in region: + for r in regions: ly = [] for s in sample_sets: y = self._cnv_hmm_dataset( @@ -898,7 +900,7 @@ def _cnv_coverage_calls_dataset( def cnv_coverage_calls( self, - region, + region: base_params.region, sample_set, analysis, inline_array=True, @@ -937,13 +939,12 @@ def cnv_coverage_calls( # calling is done independently in different sample sets. debug("normalise parameters") - region = self.resolve_region(region) - if isinstance(region, Region): - region = [region] + regions: List[Region] = parse_multi_region(self, region) + del region debug("access data and concatenate as needed") lx = [] - for r in region: + for r in regions: debug("obtain coverage calls for the contig") x = self._cnv_coverage_calls_dataset( contig=r.contig, @@ -1131,7 +1132,7 @@ def cnv_discordant_read_calls( def gene_cnv( self, - region, + region: base_params.region, sample_sets=None, sample_query=None, max_coverage_variance=DEFAULT_MAX_COVERAGE_VARIANCE, @@ -1163,9 +1164,8 @@ def gene_cnv( """ - region = self.resolve_region(region) - if isinstance(region, Region): - region = [region] + regions: List[Region] = parse_multi_region(self, region) + del region ds = simple_xarray_concat( [ @@ -1175,7 +1175,7 @@ def gene_cnv( sample_query=sample_query, max_coverage_variance=max_coverage_variance, ) - for r in region + for r in regions ], dim="genes", ) @@ -1267,7 +1267,7 @@ def _gene_cnv(self, *, region, sample_sets, sample_query, max_coverage_variance) def gene_cnv_frequencies( self, - region, + region: base_params.region, cohorts, sample_query=None, min_cohort_size=10, @@ -1318,9 +1318,8 @@ def gene_cnv_frequencies( debug("check and normalise parameters") self._check_param_min_cohort_size(min_cohort_size) - region = self.resolve_region(region) - if isinstance(region, Region): - region = [region] + regions: List[Region] = parse_multi_region(self, region) + del region debug("access and concatenate data from regions") df = pd.concat( @@ -1334,13 +1333,13 @@ def gene_cnv_frequencies( drop_invariant=drop_invariant, max_coverage_variance=max_coverage_variance, ) - for r in region + for r in regions ], axis=0, ) debug("add metadata") - title = f"Gene CNV frequencies ({self._region_str(region)})" + title = f"Gene CNV frequencies ({region_str(regions)})" df.attrs["title"] = title return df @@ -1490,7 +1489,7 @@ def _gene_cnv_frequencies( def gene_cnv_frequencies_advanced( self, - region, + region: base_params.region, area_by, period_by, sample_sets=None, @@ -1553,9 +1552,8 @@ def gene_cnv_frequencies_advanced( self._check_param_min_cohort_size(min_cohort_size) - region = self.resolve_region(region) - if isinstance(region, Region): - region = [region] + regions: List[Region] = parse_multi_region(self, region) + del region ds = simple_xarray_concat( [ @@ -1571,12 +1569,12 @@ def gene_cnv_frequencies_advanced( max_coverage_variance=max_coverage_variance, ci_method=ci_method, ) - for r in region + for r in regions ], dim="variants", ) - title = f"Gene CNV frequencies ({self._region_str(region)})" + title = f"Gene CNV frequencies ({region_str(regions)})" ds.attrs["title"] = title return ds @@ -1740,7 +1738,7 @@ def _gene_cnv_frequencies_advanced( def plot_cnv_hmm_coverage_track( self, sample, - region, + region: base_params.single_region, sample_set=None, y_max="auto", sizing_mode=gplt_params.sizing_mode_default, @@ -1777,7 +1775,7 @@ def plot_cnv_hmm_coverage_track( Passed through to bokeh line() function. show : bool, optional If true, show the plot. - x_range : bokeh.models.Range1d, optional + x_range : bokeh.models.Range, optional X axis range (for linking to other tracks). Returns @@ -1792,7 +1790,8 @@ def plot_cnv_hmm_coverage_track( import bokeh.plotting as bkplt debug("resolve region") - region = self.resolve_region(region) + region_prepped: Region = parse_single_region(self, region) + del region debug("access sample metadata, look up sample") sample_rec = self._lookup_sample(sample=sample, sample_set=sample_set) @@ -1801,7 +1800,7 @@ def plot_cnv_hmm_coverage_track( debug("access HMM data") hmm = self.cnv_hmm( - region=region, sample_sets=sample_set, max_coverage_variance=None + region=region_prepped, sample_sets=sample_set, max_coverage_variance=None ) debug("select data for the given sample") @@ -1863,7 +1862,7 @@ def plot_cnv_hmm_coverage_track( debug("tidy up the plot") fig.yaxis.axis_label = "Copy number" fig.yaxis.ticker = list(range(y_max + 1)) - self._bokeh_style_genome_xaxis(fig, region.contig) + self._bokeh_style_genome_xaxis(fig, region_prepped.contig) fig.add_layout(fig.legend[0], "right") if show: @@ -1967,7 +1966,7 @@ def plot_cnv_hmm_coverage( def plot_cnv_hmm_heatmap_track( self, - region, + region: base_params.single_region, sample_sets=None, sample_query=None, max_coverage_variance=DEFAULT_MAX_COVERAGE_VARIANCE, @@ -2017,11 +2016,12 @@ def plot_cnv_hmm_heatmap_track( import bokeh.palettes as bkpal import bokeh.plotting as bkplt - region = self.resolve_region(region) + region_prepped: Region = parse_single_region(self, region) + del region debug("access HMM data") ds_cnv = self.cnv_hmm( - region=region, + region=region_prepped, sample_sets=sample_sets, sample_query=sample_query, max_coverage_variance=max_coverage_variance, @@ -2105,7 +2105,7 @@ def plot_cnv_hmm_heatmap_track( debug("tidy") fig.yaxis.axis_label = "Samples" - self._bokeh_style_genome_xaxis(fig, region.contig) + self._bokeh_style_genome_xaxis(fig, region_prepped.contig) fig.yaxis.ticker = bkmod.FixedTicker( ticks=np.arange(len(sample_id)), ) diff --git a/malariagen_data/anoph/base.py b/malariagen_data/anoph/base.py index c2fa1ae6f..9a3605f2b 100644 --- a/malariagen_data/anoph/base.py +++ b/malariagen_data/anoph/base.py @@ -1,6 +1,8 @@ import json +from pathlib import Path from typing import ( IO, + Any, Dict, Final, Iterable, @@ -14,11 +16,23 @@ ) import bokeh.io +import numpy as np import pandas as pd from numpydoc_decorator import doc +from tqdm.auto import tqdm +from tqdm.dask import TqdmCallback from typing_extensions import Annotated, TypeAlias -from ..util import LoggingHelper, Region, check_colab_location, init_filesystem +from ..util import ( + CacheMiss, + LoggingHelper, + check_colab_location, + check_types, + hash_params, + init_filesystem, + region_param_type, + single_region_param_type, +) DEFAULT: Final[str] = "default" @@ -33,22 +47,36 @@ class base_params: names. """, ] - region: TypeAlias = Annotated[ - Union[str, Region], + + single_region: TypeAlias = Annotated[ + single_region_param_type, """ Region of the reference genome. Can be a contig name, region string (formatted like "{contig}:{start}-{end}"), or identifier of a genome feature such as a gene or transcript. """, ] + + region: TypeAlias = Annotated[ + region_param_type, + """ + Region of the reference genome. Can be a contig name, region string + (formatted like "{contig}:{start}-{end}"), or identifier of a genome + feature such as a gene or transcript. Can also be a sequence (e.g., list) + of regions. + """, + ] + release: TypeAlias = Annotated[ Union[str, Sequence[str]], "Release version identifier.", ] + sample_set: TypeAlias = Annotated[ str, "Sample set identifier.", ] + sample_sets: TypeAlias = Annotated[ Union[Sequence[str], str], """ @@ -56,12 +84,36 @@ class base_params: release. """, ] + sample_query: TypeAlias = Annotated[ str, """ - A pandas query string to be evaluated against the sample metadata. + A pandas query string to be evaluated against the sample metadata, to + select samples to be included in the returned data. """, ] + + sample_indices: TypeAlias = Annotated[ + List[int], + """ + Advanced usage parameter. A list of indices of samples to select, + corresponding to the order in which the samples are found within the + sample metadata. Either provide this parameter or sample_query, not + both. + """, + ] + + @staticmethod + def validate_sample_selection_params( + *, + sample_query: Optional[sample_query], + sample_indices: Optional[sample_indices], + ): + if sample_query is not None and sample_indices is not None: + raise ValueError( + "Please provide either sample_query or sample_indices, not both." + ) + cohort1_query: TypeAlias = Annotated[ str, """ @@ -69,6 +121,7 @@ class base_params: to select samples for the first cohort. """, ] + cohort2_query: TypeAlias = Annotated[ str, """ @@ -76,6 +129,7 @@ class base_params: to select samples for the second cohort. """, ] + site_mask: TypeAlias = Annotated[ str, """ @@ -83,6 +137,7 @@ class base_params: available values. """, ] + site_class: TypeAlias = Annotated[ str, """ @@ -97,6 +152,7 @@ class base_params: a gene). """, ] + cohort_size: TypeAlias = Annotated[ int, """ @@ -105,6 +161,7 @@ class base_params: than this value. """, ] + min_cohort_size: TypeAlias = Annotated[ int, """ @@ -112,6 +169,7 @@ class base_params: less than this value. """, ] + max_cohort_size: TypeAlias = Annotated[ int, """ @@ -119,14 +177,17 @@ class base_params: cohort is greater. """, ] + random_seed: TypeAlias = Annotated[ int, "Random seed used for reproducible down-sampling.", ] + transcript: TypeAlias = Annotated[ str, "Gene transcript identifier.", ] + cohort: TypeAlias = Annotated[ Union[str, Tuple[str, str]], """ @@ -134,6 +195,7 @@ class base_params: pair of strings giving a custom cohort label and a sample query. """, ] + cohorts: TypeAlias = Annotated[ Union[str, Mapping[str, str]], """ @@ -142,6 +204,7 @@ class base_params: queries. """, ] + n_jack: TypeAlias = Annotated[ int, """ @@ -150,6 +213,7 @@ class base_params: better. """, ] + confidence_level: TypeAlias = Annotated[ float, """ @@ -157,20 +221,27 @@ class base_params: means 95% confidence interval. """, ] + field: TypeAlias = Annotated[str, "Name of array or column to access."] + inline_array: TypeAlias = Annotated[ bool, "Passed through to dask `from_array()`.", ] + inline_array_default: inline_array = True + chunks: TypeAlias = Annotated[ - str, + Union[str, Tuple[int, ...]], """ If 'auto' let dask decide chunk size. If 'native' use native zarr - chunks. Also, can be a target size, e.g., '200 MiB'. + chunks. Also, can be a target size, e.g., '200 MiB', or a tuple of + integers. """, ] + chunks_default: chunks = "native" + gff_attributes: TypeAlias = Annotated[ Optional[Union[Sequence[str], str]], """ @@ -187,7 +258,7 @@ def __init__( url: str, config_path: str, pre: bool, - gcs_url: str, + gcs_url: Optional[str], # only used for colab location check major_version_number: int, major_version_path: str, bokeh_output_notebook: bool = False, @@ -196,6 +267,7 @@ def __init__( show_progress: bool = False, check_location: bool = False, storage_options: Optional[Mapping] = None, + results_cache: Optional[str] = None, ): self._url = url self._config_path = config_path @@ -227,7 +299,7 @@ def __init__( bokeh.io.output_notebook(hide_banner=True) # Check colab location is in the US. - if check_location: + if check_location and self._gcs_url is not None: self._client_details = check_colab_location( gcs_url=self._gcs_url, url=self._url ) @@ -240,15 +312,31 @@ def __init__( self._cache_sample_set_to_release: Optional[Dict[str, str]] = None self._cache_files: Dict[str, bytes] = dict() + # Set up results cache directory path. + self._results_cache: Optional[Path] = None + if results_cache is not None: + self._results_cache = Path(results_cache).expanduser().resolve() + + def _progress(self, iterable, **kwargs): + # progress doesn't mix well with debug logging + disable = self._debug or not self._show_progress + return tqdm(iterable, disable=disable, **kwargs) + + def _dask_progress(self, **kwargs): + disable = not self._show_progress + return TqdmCallback(disable=disable, **kwargs) + + @check_types def open_file(self, path: str) -> IO: full_path = f"{self._base_path}/{path}" return self._fs.open(full_path) + @check_types def read_files( self, paths: Iterable[str], on_error: Literal["raise", "omit", "return"] = "return", - ) -> Mapping[str, bytes]: + ) -> Mapping[str, Union[bytes, Exception]]: # Check for any cached files. files = { path: data for path, data in self._cache_files.items() if path in paths @@ -399,6 +487,7 @@ def _read_sample_sets(self, *, single_release: str): df["release"] = single_release return df + @check_types @doc( summary="Access a dataframe of sample sets", returns="A dataframe of sample sets, one row per sample set.", @@ -442,6 +531,7 @@ def sample_sets( # Return copy to ensure cached dataframes aren't modified by user. return df.copy() + @check_types @doc( summary="Find which release a sample set was included in.", ) @@ -479,8 +569,9 @@ def _prep_sample_sets_param( # Single sample set, normalise to always return a list. prepped_sample_sets = [sample_sets] - elif isinstance(sample_sets, Sequence): - # List or tuple of sample sets or releases. + else: + # Sequence of sample sets or releases. + assert isinstance(sample_sets, Sequence) prepped_sample_sets = [] for s in sample_sets: # Make a recursive call to handle the case where s is a release identifier. @@ -489,11 +580,6 @@ def _prep_sample_sets_param( # Make sure we end up with a flat list of sample sets. prepped_sample_sets.extend(sp) - else: - raise TypeError( - f"Invalid type for sample_sets parameter; expected str, list or tuple; found: {sample_sets!r}" - ) - # Ensure all sample sets selected at most once. prepped_sample_sets = sorted(set(prepped_sample_sets)) @@ -503,3 +589,43 @@ def _prep_sample_sets_param( raise ValueError(f"Sample set {s!r} not found.") return prepped_sample_sets + + def _results_cache_add_analysis_params(self, params: dict): + # Expect sub-classes will override to add any analysis parameters. + pass + + @check_types + def results_cache_get( + self, *, name: str, params: Dict[str, Any] + ) -> Mapping[str, np.ndarray]: + name = type(self).__name__.lower() + "_" + name + if self._results_cache is None: + raise CacheMiss + params = params.copy() + self._results_cache_add_analysis_params(params) + cache_key, _ = hash_params(params) + cache_path = self._results_cache / name / cache_key + results_path = cache_path / "results.npz" + if not results_path.exists(): + raise CacheMiss + results = np.load(results_path) + # TODO Do we need to read the arrays and then close the npz file? + return results + + @check_types + def results_cache_set( + self, *, name: str, params: Dict[str, Any], results: Mapping[str, np.ndarray] + ): + name = type(self).__name__.lower() + "_" + name + if self._results_cache is None: + return + params = params.copy() + self._results_cache_add_analysis_params(params) + cache_key, params_json = hash_params(params) + cache_path = self._results_cache / name / cache_key + cache_path.mkdir(exist_ok=True, parents=True) + params_path = cache_path / "params.json" + results_path = cache_path / "results.npz" + with params_path.open(mode="w") as f: + f.write(params_json) + np.savez_compressed(results_path, **results) diff --git a/malariagen_data/anoph/genome_features.py b/malariagen_data/anoph/genome_features.py index c69a4188c..44d083f1e 100644 --- a/malariagen_data/anoph/genome_features.py +++ b/malariagen_data/anoph/genome_features.py @@ -8,7 +8,14 @@ from pandas.io.common import infer_compression from typing_extensions import Annotated, TypeAlias -from ..util import Region, read_gff3, resolve_region, unpack_gff3_attributes +from ..util import ( + Region, + check_types, + parse_multi_region, + parse_single_region, + read_gff3, + unpack_gff3_attributes, +) from .base import DEFAULT, base_params from .genome_sequence import AnophelesGenomeSequenceData @@ -61,7 +68,7 @@ class gplt_params: ] toolbar_location_default: toolbar_location = "above" x_range: TypeAlias = Annotated[ - bokeh.models.Range1d, + bokeh.models.Range, "X axis range (for linking to other tracks).", ] title: TypeAlias = Annotated[ @@ -69,7 +76,10 @@ class gplt_params: "Plot title. If True, a title may be automatically generated.", ] figure: TypeAlias = Annotated[ - bokeh.plotting.Figure, + # Use quite a broad type here to accommodate both single-panel figures + # created via bokeh.plotting and multi-panel figures created via + # bokeh.layouts. + bokeh.model.Model, "A bokeh figure.", ] @@ -79,7 +89,7 @@ def __init__( self, *, gff_gene_type: str, - gff_default_attributes: Tuple[str], + gff_default_attributes: Tuple[str, ...], **kwargs, ): # N.B., this class is designed to work cooperatively, and @@ -139,6 +149,7 @@ def _prep_gff_attributes( attributes_normed = tuple(attributes) return attributes_normed + @check_types @doc( summary="Access genome feature annotations.", returns="A dataframe of genome annotations, one row per feature.", @@ -155,16 +166,12 @@ def genome_features( if region is not None: debug("Handle region.") - resolved_region = resolve_region(self, region) + regions = parse_multi_region(self, region) del region - debug("Normalise to list to simplify concatenation logic.") - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - debug("Apply region query.") parts = [] - for r in resolved_region: + for r in regions: df_part = self._genome_features_for_contig( contig=r.contig, attributes=attributes_normed ) @@ -203,6 +210,7 @@ def genome_feature_children( return df_children.copy() + @check_types @doc(summary="Plot a transcript, using bokeh.") def plot_transcript( self, @@ -212,7 +220,9 @@ def plot_transcript( height: gplt_params.height = gplt_params.genes_height_default, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, - toolbar_location: gplt_params.toolbar_location = gplt_params.toolbar_location_default, + toolbar_location: Optional[ + gplt_params.toolbar_location + ] = gplt_params.toolbar_location_default, title: gplt_params.title = True, ) -> gplt_params.figure: debug = self._log.debug @@ -331,24 +341,27 @@ def plot_transcript( return fig + @check_types @doc( summary="Plot a genes track, using bokeh.", ) def plot_genes( self, - region: base_params.region, + region: base_params.single_region, sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, - toolbar_location: gplt_params.toolbar_location = gplt_params.toolbar_location_default, + toolbar_location: Optional[ + gplt_params.toolbar_location + ] = gplt_params.toolbar_location_default, x_range: Optional[gplt_params.x_range] = None, title: gplt_params.title = "Genes", ) -> gplt_params.figure: debug = self._log.debug debug("handle region parameter - this determines the genome region to plot") - resolved_region = resolve_region(self, region) + resolved_region: Region = parse_single_region(self, region) del region debug("handle region bounds") diff --git a/malariagen_data/anoph/genome_sequence.py b/malariagen_data/anoph/genome_sequence.py index c19da008e..860556a86 100644 --- a/malariagen_data/anoph/genome_sequence.py +++ b/malariagen_data/anoph/genome_sequence.py @@ -4,7 +4,13 @@ import zarr from numpydoc_decorator import doc -from ..util import Region, da_from_zarr, init_zarr_store, resolve_region +from ..util import ( + Region, + check_types, + da_from_zarr, + init_zarr_store, + parse_single_region, +) from .base import AnophelesBase, base_params @@ -42,6 +48,7 @@ def _genome_ref_id(self) -> str: def _genome_ref_name(self) -> str: return self.config["GENOME_REF_NAME"] + @check_types @doc( summary="Open the reference genome zarr.", returns="Zarr hierarchy containing the reference genome sequence.", @@ -61,6 +68,7 @@ def _genome_sequence_for_contig(self, *, contig, inline_array, chunks): d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) return d + @check_types @doc( summary="Access the reference genome sequence.", returns=""" @@ -70,12 +78,12 @@ def _genome_sequence_for_contig(self, *, contig, inline_array, chunks): ) def genome_sequence( self, - region: base_params.region, + region: base_params.single_region, inline_array: base_params.inline_array = base_params.inline_array_default, chunks: base_params.chunks = base_params.chunks_default, ) -> da.Array: # Parse the region parameter into a Region object. - resolved_region: Region = resolve_region(self, region) + resolved_region: Region = parse_single_region(self, region) del region # Obtain complete sequence for the requested contig. diff --git a/malariagen_data/anoph/sample_metadata.py b/malariagen_data/anoph/sample_metadata.py index 760eb2fe6..2e847bece 100644 --- a/malariagen_data/anoph/sample_metadata.py +++ b/malariagen_data/anoph/sample_metadata.py @@ -8,6 +8,7 @@ from numpydoc_decorator import doc from typing_extensions import Annotated, TypeAlias +from ..util import check_types from .base import AnophelesBase, base_params @@ -146,6 +147,7 @@ def _parse_general_metadata( else: raise data + @check_types @doc( summary=""" Read general sample metadata for one or more sample sets into a pandas @@ -167,7 +169,7 @@ def general_metadata( # Fetch all files. N.B., here is an optimisation, this allows us to fetch # multiple files concurrently. - files: Mapping[str, bytes] = self.read_files( + files: Mapping[str, Union[bytes, Exception]] = self.read_files( paths=file_paths.values(), on_error="return" ) @@ -286,6 +288,13 @@ def _parse_cohorts_metadata( else: raise data + def _require_cohorts_analysis(self): + if not self._cohorts_analysis: + raise NotImplementedError( + "Cohorts data not available for this data resource." + ) + + @check_types @doc( summary=""" Access cohort membership metadata for one or more sample sets. @@ -295,11 +304,7 @@ def _parse_cohorts_metadata( def cohorts_metadata( self, sample_sets: Optional[base_params.sample_sets] = None ) -> pd.DataFrame: - # Not all data resources have cohorts metadata. - if not self._cohorts_analysis: - raise NotImplementedError( - "Cohorts metadata not available for this data resource." - ) + self._require_cohorts_analysis() # Normalise input parameters. sample_sets_prepped = self._prep_sample_sets_param(sample_sets=sample_sets) @@ -312,7 +317,7 @@ def cohorts_metadata( # Fetch all files. N.B., here is an optimisation, this allows us to fetch # multiple files concurrently. - files: Mapping[str, bytes] = self.read_files( + files: Mapping[str, Union[bytes, Exception]] = self.read_files( paths=file_paths.values(), on_error="return" ) @@ -379,6 +384,11 @@ def _parse_aim_metadata( else: raise data + def _require_aim_analysis(self): + if not self._aim_analysis: + raise NotImplementedError("AIM data not available for this data resource.") + + @check_types @doc( summary=""" Access ancestry-informative marker (AIM) metadata for one or more @@ -389,11 +399,7 @@ def _parse_aim_metadata( def aim_metadata( self, sample_sets: Optional[base_params.sample_sets] = None ) -> pd.DataFrame: - # Not all data resources have AIM data. - if not self._aim_analysis: - raise NotImplementedError( - "AIM metadata not available for this data resource." - ) + self._require_aim_analysis() # Normalise input parameters. sample_sets_prepped = self._prep_sample_sets_param(sample_sets=sample_sets) @@ -406,7 +412,7 @@ def aim_metadata( # Fetch all files. N.B., here is an optimisation, this allows us to fetch # multiple files concurrently. - files: Mapping[str, bytes] = self.read_files( + files: Mapping[str, Union[bytes, Exception]] = self.read_files( paths=file_paths.values(), on_error="return" ) @@ -423,6 +429,7 @@ def aim_metadata( return df_ret + @check_types @doc( summary=""" Add extra sample metadata, e.g., including additional columns @@ -472,6 +479,7 @@ def add_extra_metadata(self, data: pd.DataFrame, on: str = "sample_id"): def clear_extra_metadata(self): self._extra_metadata = [] + @check_types @doc( summary="Access sample metadata for one or more sample sets.", returns="A dataframe of sample metadata, one row per sample.", @@ -480,8 +488,14 @@ def sample_metadata( self, sample_sets: Optional[base_params.sample_sets] = None, sample_query: Optional[base_params.sample_query] = None, + sample_indices: Optional[base_params.sample_indices] = None, ) -> pd.DataFrame: - # Set up for caching. + # Extra parameter checks. + base_params.validate_sample_selection_params( + sample_query=sample_query, sample_indices=sample_indices + ) + + # Normalise parameters. prepped_sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) del sample_sets cache_key = tuple(prepped_sample_sets) @@ -507,18 +521,19 @@ def sample_metadata( for on, data in self._extra_metadata: df_samples = df_samples.merge(data, how="left", on=on) - # For convenience, apply a query. + # For convenience, apply a sample selection. if sample_query is not None: - if isinstance(sample_query, str): - # Assume a pandas query string. - df_samples = df_samples.query(sample_query) - else: - # Assume it is an indexer. - df_samples = df_samples.iloc[sample_query] + # Assume a pandas query string. + df_samples = df_samples.query(sample_query) + df_samples = df_samples.reset_index(drop=True) + elif sample_indices is not None: + # Assume it is an indexer. + df_samples = df_samples.iloc[sample_indices] df_samples = df_samples.reset_index(drop=True) return df_samples.copy() + @check_types @doc( summary=""" Create a pivot table showing numbers of samples available by space, @@ -559,6 +574,7 @@ def count_samples( return df_pivot + @check_types @doc( summary=""" Plot an interactive map showing sampling locations using ipyleaflet. @@ -578,7 +594,7 @@ def plot_samples_interactive_map( self, sample_sets: Optional[base_params.sample_sets] = None, sample_query: Optional[base_params.sample_query] = None, - basemap: map_params.basemap = map_params.basemap_default, + basemap: Optional[map_params.basemap] = map_params.basemap_default, center: map_params.center = map_params.center_default, zoom: map_params.zoom = map_params.zoom_default, height: map_params.height = map_params.height_default, @@ -639,10 +655,10 @@ def plot_samples_interactive_map( raise ValueError("Basemap abbreviation not recognised:", basemap_str) basemap_provider = basemap_providers_dict[basemap_str] elif basemap is None: - # Default + # Default. basemap_provider = ipyleaflet.basemaps.Esri.WorldImagery else: - # Expect dict or TileProvider or TileLayer + # Expect dict or TileProvider or TileLayer. basemap_provider = basemap # Create a map. @@ -652,7 +668,7 @@ def plot_samples_interactive_map( basemap=basemap_provider, ) scale_control = ipyleaflet.ScaleControl(position="bottomleft") - samples_map.add_control(scale_control) + samples_map.add(scale_control) samples_map.layout.height = height samples_map.layout.width = width @@ -684,10 +700,11 @@ def plot_samples_interactive_map( draggable=False, title=title, ) - samples_map.add_layer(marker) + samples_map.add(marker) return samples_map + @check_types @doc( summary=""" Load a data catalog providing URLs for downloading BAM, VCF and Zarr @@ -716,3 +733,29 @@ def wgs_data_catalog(self, sample_set: base_params.sample_set): ] return df + + def _prep_sample_selection_cache_params( + self, + *, + sample_sets: Optional[base_params.sample_sets], + sample_query: Optional[base_params.sample_query], + sample_indices: Optional[base_params.sample_indices], + ) -> Tuple[List[str], Optional[List[int]]]: + # Normalise sample sets. + sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) + + if sample_query is not None: + # Resolve query to a list of integers for more cache hits - we + # do this because there are different ways to write the same pandas + # query, and so it's better to evaluate the query and use a list of + # integer indices instead. + df_samples = self.sample_metadata(sample_sets=sample_sets) + loc_samples = df_samples.eval(sample_query).values + sample_indices = np.nonzero(loc_samples)[0].tolist() + + return sample_sets, sample_indices + + def _results_cache_add_analysis_params(self, params: dict): + super()._results_cache_add_analysis_params(params) + params["cohorts_analysis"] = self._cohorts_analysis + params["aim_analysis"] = self._aim_analysis diff --git a/malariagen_data/anoph/snp_data.py b/malariagen_data/anoph/snp_data.py new file mode 100644 index 000000000..65d46ad40 --- /dev/null +++ b/malariagen_data/anoph/snp_data.py @@ -0,0 +1,1293 @@ +from typing import Dict, List, Optional, Tuple, Union + +import allel +import bokeh +import dask.array as da +import numpy as np +import pandas as pd +import xarray as xr +import zarr +from numpydoc_decorator import doc + +from ..util import ( + DIM_ALLELE, + DIM_PLOIDY, + DIM_SAMPLE, + DIM_VARIANT, + CacheMiss, + Region, + check_types, + da_compress, + da_concat, + da_from_zarr, + dask_compress_dataset, + init_zarr_store, + locate_region, + parse_multi_region, + parse_single_region, + simple_xarray_concat, + true_runs, +) +from .base import DEFAULT, base_params +from .genome_features import AnophelesGenomeFeaturesData, gplt_params +from .genome_sequence import AnophelesGenomeSequenceData +from .sample_metadata import AnophelesSampleMetadata + + +class AnophelesSnpData( + AnophelesSampleMetadata, AnophelesGenomeFeaturesData, AnophelesGenomeSequenceData +): + def __init__( + self, + site_filters_analysis: Optional[str] = None, + default_site_mask: Optional[str] = None, + **kwargs, + ): + # N.B., this class is designed to work cooperatively, and + # so it's important that any remaining parameters are passed + # to the superclass constructor. + super().__init__(**kwargs) + + # If provided, this analysis version will override the + # default value provided in the release configuration. + self._site_filters_analysis_override = site_filters_analysis + + # These will vary between data resources. + self._default_site_mask = default_site_mask + + # Set up caches. + # TODO review type annotations here, maybe can tighten + self._cache_snp_sites = None + self._cache_snp_genotypes: Dict = dict() + self._cache_site_filters: Dict = dict() + self._cache_site_annotations = None + self._cache_locate_site_class: Dict = dict() + + @property + def _site_filters_analysis(self) -> Optional[str]: + if self._site_filters_analysis_override: + return self._site_filters_analysis_override + else: + # N.B., this will return None if the key is not present in the + # config. + return self.config.get("DEFAULT_SITE_FILTERS_ANALYSIS") + + @property + def site_mask_ids(self) -> Tuple[str, ...]: + """Identifiers for the different site masks that are available. + These are values than can be used for the `site_mask` parameter in any + method making using of SNP data. + + """ + return tuple(self.config.get("SITE_MASK_IDS", ())) # ensure tuple + + @property + def _site_annotations_zarr_path(self) -> str: + return self.config["SITE_ANNOTATIONS_ZARR_PATH"] + + def _prep_site_mask_param( + self, + *, + site_mask: base_params.site_mask, + ) -> base_params.site_mask: + if site_mask == DEFAULT: + # Use whatever is the default site mask for this data resource. + assert self._default_site_mask is not None + return self._default_site_mask + elif site_mask in self.site_mask_ids: + return site_mask + else: + raise ValueError( + f"Invalid site mask, must be one of f{self.site_mask_ids}." + ) + + def _prep_optional_site_mask_param( + self, + *, + site_mask: Optional[base_params.site_mask], + ) -> Optional[base_params.site_mask]: + if site_mask is None: + # This is allowed, it means don't apply any site mask to the data. + return None + else: + return self._prep_site_mask_param(site_mask=site_mask) + + @doc( + summary="Open SNP sites zarr", + returns="Zarr hierarchy.", + ) + def open_snp_sites(self) -> zarr.hierarchy.Group: + # Here we cache the opened zarr hierarchy, to avoid small delays + # reading zarr metadata. + if self._cache_snp_sites is None: + path = ( + f"{self._base_path}/{self._major_version_path}/snp_genotypes/all/sites/" + ) + store = init_zarr_store(fs=self._fs, path=path) + root = zarr.open_consolidated(store=store) + self._cache_snp_sites = root + return self._cache_snp_sites + + @check_types + @doc( + summary="Open SNP genotypes zarr for a given sample set.", + returns="Zarr hierarchy.", + ) + def open_snp_genotypes( + self, sample_set: base_params.sample_set + ) -> zarr.hierarchy.Group: + # Here we cache the opened zarr hierarchy, to avoid small delays + # reading zarr metadata. + try: + return self._cache_snp_genotypes[sample_set] + except KeyError: + release = self.lookup_release(sample_set=sample_set) + release_path = self._release_to_path(release) + path = f"{self._base_path}/{release_path}/snp_genotypes/all/{sample_set}/" + store = init_zarr_store(fs=self._fs, path=path) + root = zarr.open_consolidated(store=store) + self._cache_snp_genotypes[sample_set] = root + return root + + def _require_site_filters_analysis(self): + if not self._site_filters_analysis: + raise NotImplementedError( + "Site filters not available for this data resource." + ) + + @check_types + @doc( + summary="Open site filters zarr.", + returns="Zarr hierarchy.", + ) + def open_site_filters( + self, + mask: base_params.site_mask, + ) -> zarr.hierarchy.Group: + self._require_site_filters_analysis() + mask = self._prep_site_mask_param(site_mask=mask) + + # Here we cache the opened zarr hierarchy, to avoid small delays + # reading zarr metadata. + try: + return self._cache_site_filters[mask] + except KeyError: + path = f"{self._base_path}/{self._major_version_path}/site_filters/{self._site_filters_analysis}/{mask}/" + store = init_zarr_store(fs=self._fs, path=path) + root = zarr.open_consolidated(store=store) + self._cache_site_filters[mask] = root + return root + + @doc( + summary="Open site annotations zarr.", + returns="Zarr hierarchy.", + ) + def open_site_annotations(self) -> zarr.hierarchy.Group: + if self._cache_site_annotations is None: + path = f"{self._base_path}/{self._site_annotations_zarr_path}" + store = init_zarr_store(fs=self._fs, path=path) + self._cache_site_annotations = zarr.open_consolidated(store=store) + return self._cache_site_annotations + + def _site_filters_for_region( + self, + *, + region: Region, + mask: base_params.site_mask, + field: base_params.field, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ): + root = self.open_site_filters(mask=mask) + z = root[f"{region.contig}/variants/{field}"] + d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) + if region.start or region.end: + root = self.open_snp_sites() + pos = root[f"{region.contig}/variants/POS"][:] + loc_region = locate_region(region, pos) + d = d[loc_region] + return d + + @check_types + @doc( + summary="Access SNP site filters.", + returns=""" + An array of boolean values identifying sites that pass the filters. + """, + ) + def site_filters( + self, + region: base_params.region, + mask: base_params.site_mask, + field: base_params.field = "filter_pass", + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ) -> da.Array: + # Resolve the region parameter to a standard type. + regions: List[Region] = parse_multi_region(self, region) + del region + + # Load arrays and concatenate if needed. + d = da_concat( + [ + self._site_filters_for_region( + region=r, + mask=mask, + field=field, + inline_array=inline_array, + chunks=chunks, + ) + for r in regions + ] + ) + + return d + + def _snp_sites_for_contig( + self, + *, + contig: base_params.contig, + field: base_params.field, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ) -> da.Array: + """Access SNP sites data for a single contig.""" + root = self.open_snp_sites() + z = root[f"{contig}/variants/{field}"] + ret = da_from_zarr(z, inline_array=inline_array, chunks=chunks) + return ret + + def _snp_sites_for_region( + self, + *, + region: Region, + field: base_params.field, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ) -> da.Array: + # Access data for the requested contig. + ret = self._snp_sites_for_contig( + contig=region.contig, field=field, inline_array=inline_array, chunks=chunks + ) + + # Deal with a region. + if region.start or region.end: + if field == "POS": + pos = ret + else: + pos = self._snp_sites_for_contig( + contig=region.contig, + field="POS", + inline_array=inline_array, + chunks=chunks, + ) + loc_region = locate_region(region, np.asarray(pos)) + ret = ret[loc_region] + + return ret + + @check_types + @doc( + summary="Access SNP site data (positions or alleles).", + returns=""" + An array of either SNP positions ("POS"), reference alleles ("REF") or + alternate alleles ("ALT"). + """, + ) + def snp_sites( + self, + region: base_params.region, + field: base_params.field, + site_mask: Optional[base_params.site_mask] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ) -> da.Array: + # Resolve the region parameter to a standard type. + regions: List[Region] = parse_multi_region(self, region) + del region + + # Access SNP sites and concatenate over regions. + ret = da_concat( + [ + self._snp_sites_for_region( + region=r, + field=field, + chunks=chunks, + inline_array=inline_array, + ) + for r in regions + ], + axis=0, + ) + + # Apply site mask if requested. + if site_mask is not None: + loc_sites = self.site_filters( + region=regions, + mask=site_mask, + chunks=chunks, + inline_array=inline_array, + ) + ret = da_compress(loc_sites, ret, axis=0) + + return ret + + def _snp_genotypes_for_contig( + self, + *, + contig: base_params.contig, + sample_set: base_params.sample_set, + field: base_params.field, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ) -> da.Array: + """Access SNP genotypes for a single contig and a single sample set.""" + root = self.open_snp_genotypes(sample_set=sample_set) + z = root[f"{contig}/calldata/{field}"] + d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) + return d + + @check_types + @doc( + summary="Access SNP genotypes and associated data.", + returns=""" + An array of either genotypes (GT), genotype quality (GQ), allele + depths (AD) or mapping quality (MQ) values. + """, + ) + def snp_genotypes( + self, + region: base_params.region, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + sample_indices: Optional[base_params.sample_indices] = None, + field: base_params.field = "GT", + site_mask: Optional[base_params.site_mask] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ) -> da.Array: + # Additional parameter checks. + base_params.validate_sample_selection_params( + sample_query=sample_query, sample_indices=sample_indices + ) + + # Normalise parameters. + sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) + regions: List[Region] = parse_multi_region(self, region) + del region + + # Concatenate multiple sample sets and/or contigs. + lx = [] + for r in regions: + contig = r.contig + ly = [] + + for s in sample_sets: + y = self._snp_genotypes_for_contig( + contig=contig, + sample_set=s, + field=field, + inline_array=inline_array, + chunks=chunks, + ) + ly.append(y) + + # Concatenate data from multiple sample sets. + x = da_concat(ly, axis=1) + + # Locate region - do this only once, optimisation. + if r.start or r.end: + pos = self._snp_sites_for_contig( + contig=contig, field="POS", inline_array=inline_array, chunks=chunks + ) + loc_region = locate_region(r, np.asarray(pos)) + x = x[loc_region] + + lx.append(x) + + # Concatenate data from multiple regions. + d = da_concat(lx, axis=0) + + # Apply site filters if requested. + if site_mask is not None: + loc_sites = self.site_filters( + region=regions, + mask=site_mask, + ) + d = da_compress(loc_sites, d, axis=0) + + # Apply sample selection if requested. + if sample_query is not None: + df_samples = self.sample_metadata(sample_sets=sample_sets) + loc_samples = df_samples.eval(sample_query).values + d = da.compress(loc_samples, d, axis=1) + elif sample_indices is not None: + d = da.take(d, sample_indices, axis=1) + + return d + + def _snp_variants_for_contig( + self, + *, + contig: base_params.contig, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ): + coords = dict() + data_vars = dict() + sites_root = self.open_snp_sites() + + # Set up variant_position. + pos_z = sites_root[f"{contig}/variants/POS"] + variant_position = da_from_zarr(pos_z, inline_array=inline_array, chunks=chunks) + coords["variant_position"] = [DIM_VARIANT], variant_position + + # Set up variant_allele. + ref_z = sites_root[f"{contig}/variants/REF"] + alt_z = sites_root[f"{contig}/variants/ALT"] + ref = da_from_zarr(ref_z, inline_array=inline_array, chunks=chunks) + alt = da_from_zarr(alt_z, inline_array=inline_array, chunks=chunks) + variant_allele = da.concatenate([ref[:, None], alt], axis=1) + data_vars["variant_allele"] = [DIM_VARIANT, DIM_ALLELE], variant_allele + + # Set up variant_contig. + contig_index = self.contigs.index(contig) + variant_contig = da.full_like( + variant_position, fill_value=contig_index, dtype="u1" + ) + coords["variant_contig"] = [DIM_VARIANT], variant_contig + + # Set up site filters arrays. + for mask in self.site_mask_ids: + filters_root = self.open_site_filters(mask=mask) + z = filters_root[f"{contig}/variants/filter_pass"] + d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) + data_vars[f"variant_filter_pass_{mask}"] = [DIM_VARIANT], d + + # Set up attributes. + attrs = {"contigs": self.contigs} + + # Create a dataset. + ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + + return ds + + @check_types + @doc( + summary="Access SNP sites and site filters.", + returns="A dataset containing SNP sites and site filters.", + ) + def snp_variants( + self, + region: base_params.region, + site_mask: Optional[base_params.site_mask] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ): + # Normalise parameters. + regions: List[Region] = parse_multi_region(self, region) + del region + + # Access SNP data and concatenate multiple regions. + lx = [] + for r in regions: + # Access variants. + x = self._snp_variants_for_contig( + contig=r.contig, + inline_array=inline_array, + chunks=chunks, + ) + + # Handle region. + if r.start or r.end: + pos = x["variant_position"].values + loc_region = locate_region(r, pos) + x = x.isel(variants=loc_region) + + lx.append(x) + + # Concatenate data from multiple regions. + ds = simple_xarray_concat(lx, dim=DIM_VARIANT) + + # Apply site filters. + if site_mask is not None: + ds = dask_compress_dataset( + ds, indexer=f"variant_filter_pass_{site_mask}", dim=DIM_VARIANT + ) + + return ds + + @check_types + @doc( + summary="Load site annotations.", + returns="A dataset of site annotations.", + ) + def site_annotations( + self, + region: base_params.single_region, + site_mask: Optional[base_params.site_mask] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ) -> xr.Dataset: + # N.B., we default to chunks="auto" here for performance reasons + + # Resolve region. + resolved_region: Region = parse_single_region(self, region) + del region + contig = resolved_region.contig + + # Open site annotations zarr. + root = self.open_site_annotations() + + # Build a dataset. + ds = xr.Dataset() + for field in ( + "codon_degeneracy", + "codon_nonsyn", + "codon_position", + "seq_cls", + "seq_flen", + "seq_relpos_start", + "seq_relpos_stop", + ): + data = da_from_zarr( + root[field][contig], + inline_array=inline_array, + chunks=chunks, + ) + ds[field] = "variants", data + + # Subset to SNP positions. + pos = self.snp_sites( + region=contig, + field="POS", + site_mask=site_mask, + inline_array=inline_array, + chunks=chunks, + ) + pos = pos.compute() + if resolved_region.start or resolved_region.end: + loc_region = locate_region(resolved_region, pos) + pos = pos[loc_region] + idx = pos - 1 + ds = ds.isel(variants=idx) + + return ds + + def _locate_site_class( + self, + *, + region: base_params.single_region, + site_mask: Optional[base_params.site_mask], + site_class: base_params.site_class, + ): + # Cache these data in memory to avoid repeated computation. + cache_key = (region, site_mask, site_class) + + try: + loc_ann = self._cache_locate_site_class[cache_key] + + except KeyError: + # Access site annotations data. + ds_ann = self.site_annotations( + region=region, + site_mask=site_mask, + ) + codon_pos = ds_ann["codon_position"].data + codon_deg = ds_ann["codon_degeneracy"].data + seq_cls = ds_ann["seq_cls"].data + seq_flen = ds_ann["seq_flen"].data + seq_relpos_start = ds_ann["seq_relpos_start"].data + seq_relpos_stop = ds_ann["seq_relpos_stop"].data + site_class = site_class.upper() + + # Define constants used in site annotations data. + SEQ_CLS_UNKNOWN = 0 # noqa + SEQ_CLS_UPSTREAM = 1 + SEQ_CLS_DOWNSTREAM = 2 + SEQ_CLS_5UTR = 3 + SEQ_CLS_3UTR = 4 + SEQ_CLS_CDS_FIRST = 5 + SEQ_CLS_CDS_MID = 6 + SEQ_CLS_CDS_LAST = 7 + SEQ_CLS_INTRON_FIRST = 8 + SEQ_CLS_INTRON_MID = 9 + SEQ_CLS_INTRON_LAST = 10 + CODON_DEG_UNKNOWN = 0 # noqa + CODON_DEG_0 = 1 + CODON_DEG_2_SIMPLE = 2 + CODON_DEG_2_COMPLEX = 3 # noqa + CODON_DEG_4 = 4 + + # Set up site selection. + + if site_class == "CDS_DEG_4": + # 4-fold degenerate coding sites + loc_ann = ( + ( + (seq_cls == SEQ_CLS_CDS_FIRST) + | (seq_cls == SEQ_CLS_CDS_MID) + | (seq_cls == SEQ_CLS_CDS_LAST) + ) + & (codon_pos == 2) + & (codon_deg == CODON_DEG_4) + ) + + elif site_class == "CDS_DEG_2_SIMPLE": + # 2-fold degenerate coding sites + loc_ann = ( + ( + (seq_cls == SEQ_CLS_CDS_FIRST) + | (seq_cls == SEQ_CLS_CDS_MID) + | (seq_cls == SEQ_CLS_CDS_LAST) + ) + & (codon_pos == 2) + & (codon_deg == CODON_DEG_2_SIMPLE) + ) + + elif site_class == "CDS_DEG_0": + # non-degenerate coding sites + loc_ann = ( + (seq_cls == SEQ_CLS_CDS_FIRST) + | (seq_cls == SEQ_CLS_CDS_MID) + | (seq_cls == SEQ_CLS_CDS_LAST) + ) & (codon_deg == CODON_DEG_0) + + elif site_class == "INTRON_SHORT": + # short introns, excluding splice regions + loc_ann = ( + ( + (seq_cls == SEQ_CLS_INTRON_FIRST) + | (seq_cls == SEQ_CLS_INTRON_MID) + | (seq_cls == SEQ_CLS_INTRON_LAST) + ) + & (seq_flen < 100) + & (seq_relpos_start > 10) + & (seq_relpos_stop > 10) + ) + + elif site_class == "INTRON_LONG": + # long introns, excluding splice regions + loc_ann = ( + ( + (seq_cls == SEQ_CLS_INTRON_FIRST) + | (seq_cls == SEQ_CLS_INTRON_MID) + | (seq_cls == SEQ_CLS_INTRON_LAST) + ) + & (seq_flen > 200) + & (seq_relpos_start > 10) + & (seq_relpos_stop > 10) + ) + + elif site_class == "INTRON_SPLICE_5PRIME": + # 5' intron splice regions + loc_ann = ( + (seq_cls == SEQ_CLS_INTRON_FIRST) + | (seq_cls == SEQ_CLS_INTRON_MID) + | (seq_cls == SEQ_CLS_INTRON_LAST) + ) & (seq_relpos_start < 2) + + elif site_class == "INTRON_SPLICE_3PRIME": + # 3' intron splice regions + loc_ann = ( + (seq_cls == SEQ_CLS_INTRON_FIRST) + | (seq_cls == SEQ_CLS_INTRON_MID) + | (seq_cls == SEQ_CLS_INTRON_LAST) + ) & (seq_relpos_stop < 2) + + elif site_class == "UTR_5PRIME": + # 5' UTR + loc_ann = seq_cls == SEQ_CLS_5UTR + + elif site_class == "UTR_3PRIME": + # 3' UTR + loc_ann = seq_cls == SEQ_CLS_3UTR + + elif site_class == "INTERGENIC": + # intergenic regions, distant from a gene + loc_ann = ( + (seq_cls == SEQ_CLS_UPSTREAM) & (seq_relpos_stop > 10_000) + ) | ((seq_cls == SEQ_CLS_DOWNSTREAM) & (seq_relpos_start > 10_000)) + + else: + raise NotImplementedError(site_class) + + # Compute site selection. + with self._dask_progress(desc=f"Locate {site_class} sites"): + loc_ann = loc_ann.compute() + + self._cache_locate_site_class[cache_key] = loc_ann + + return loc_ann + + def _snp_calls_for_contig( + self, + *, + contig: base_params.contig, + sample_set: base_params.sample_set, + inline_array: base_params.inline_array, + chunks: base_params.chunks, + ) -> xr.Dataset: + coords = dict() + data_vars = dict() + + # Set up call arrays. + calls_root = self.open_snp_genotypes(sample_set=sample_set) + gt_z = calls_root[f"{contig}/calldata/GT"] + call_genotype = da_from_zarr(gt_z, inline_array=inline_array, chunks=chunks) + gq_z = calls_root[f"{contig}/calldata/GQ"] + call_gq = da_from_zarr(gq_z, inline_array=inline_array, chunks=chunks) + ad_z = calls_root[f"{contig}/calldata/AD"] + call_ad = da_from_zarr(ad_z, inline_array=inline_array, chunks=chunks) + mq_z = calls_root[f"{contig}/calldata/MQ"] + call_mq = da_from_zarr(mq_z, inline_array=inline_array, chunks=chunks) + data_vars["call_genotype"] = ( + [DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], + call_genotype, + ) + data_vars["call_GQ"] = ([DIM_VARIANT, DIM_SAMPLE], call_gq) + data_vars["call_MQ"] = ([DIM_VARIANT, DIM_SAMPLE], call_mq) + data_vars["call_AD"] = ( + [DIM_VARIANT, DIM_SAMPLE, DIM_ALLELE], + call_ad, + ) + + # Set up sample arrays. + z = calls_root["samples"] + sample_id = da_from_zarr(z, inline_array=inline_array, chunks=chunks) + # Decode to unicode strings, as it is stored as bytes objects. + sample_id = sample_id.astype("U") + coords["sample_id"] = [DIM_SAMPLE], sample_id + + # Create a dataset. + ds = xr.Dataset(data_vars=data_vars, coords=coords) + + return ds + + @check_types + @doc( + summary="Access SNP sites, site filters and genotype calls.", + returns="A dataset containing SNP sites, site filters and genotype calls.", + ) + def snp_calls( + self, + region: base_params.region, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + sample_indices: Optional[base_params.sample_indices] = None, + site_mask: Optional[base_params.site_mask] = None, + site_class: Optional[base_params.site_class] = None, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + cohort_size: Optional[base_params.cohort_size] = None, + min_cohort_size: Optional[base_params.min_cohort_size] = None, + max_cohort_size: Optional[base_params.max_cohort_size] = None, + random_seed: base_params.random_seed = 42, + ) -> xr.Dataset: + # Additional parameter checks. + base_params.validate_sample_selection_params( + sample_query=sample_query, sample_indices=sample_indices + ) + + # Normalise parameters. + sample_sets_prepped: List[str] = self._prep_sample_sets_param( + sample_sets=sample_sets + ) + del sample_sets + regions: List[Region] = parse_multi_region(self, region) + del region + + # Access SNP calls and concatenate multiple sample sets and/or regions. + lx = [] + for r in regions: + ly = [] + for s in sample_sets_prepped: + y = self._snp_calls_for_contig( + contig=r.contig, + sample_set=s, + inline_array=inline_array, + chunks=chunks, + ) + ly.append(y) + + # Concatenate data from multiple sample sets. + x = simple_xarray_concat(ly, dim=DIM_SAMPLE) + + # Add variants variables. + v = self._snp_variants_for_contig( + contig=r.contig, inline_array=inline_array, chunks=chunks + ) + x = xr.merge([v, x], compat="override", join="override") + + # Handle site class. + if site_class is not None: + loc_ann = self._locate_site_class( + region=r.contig, + site_class=site_class, + site_mask=None, + ) + x = x.isel(variants=loc_ann) + + # Handle region, do this only once - optimisation. + if r.start or r.end: + pos = x["variant_position"].values + loc_region = locate_region(r, pos) + x = x.isel(variants=loc_region) + + lx.append(x) + + # Concatenate data from multiple regions. + ds = simple_xarray_concat(lx, dim=DIM_VARIANT) + + if site_mask is not None: + # Apply site filters. + ds = dask_compress_dataset( + ds, indexer=f"variant_filter_pass_{site_mask}", dim=DIM_VARIANT + ) + + # Add call_genotype_mask. + ds["call_genotype_mask"] = ds["call_genotype"] < 0 + + # Handle sample selection. + if sample_query is not None: + df_samples = self.sample_metadata(sample_sets=sample_sets_prepped) + loc_samples = df_samples.eval(sample_query).values + if np.count_nonzero(loc_samples) == 0: + raise ValueError(f"No samples found for query {sample_query!r}") + ds = ds.isel(samples=loc_samples) + elif sample_indices is not None: + ds = ds.isel(samples=sample_indices) + + # Handle cohort size, overrides min and max. + if cohort_size is not None: + min_cohort_size = cohort_size + max_cohort_size = cohort_size + + # Handle min cohort size. + if min_cohort_size is not None: + n_samples = ds.dims["samples"] + if n_samples < min_cohort_size: + raise ValueError( + f"not enough samples ({n_samples}) for minimum cohort size ({min_cohort_size})" + ) + + # Handle max cohort size. + if max_cohort_size is not None: + n_samples = ds.dims["samples"] + if n_samples > max_cohort_size: + rng = np.random.default_rng(seed=random_seed) + loc_downsample = rng.choice( + n_samples, size=max_cohort_size, replace=False + ) + loc_downsample.sort() + ds = ds.isel(samples=loc_downsample) + + return ds + + def snp_dataset(self, *args, **kwargs): + """Deprecated, this method has been renamed to snp_calls().""" + return self.snp_calls(*args, **kwargs) + + def _prep_region_cache_param( + self, *, region: base_params.region + ) -> Union[dict, List[dict]]: + """Obtain a normalised representation of a region parameter which can + be used with the results cache.""" + + # N.B., we need to convert to a dict, because cache saves params as + # JSON. + + region_prepped: List[Region] = parse_multi_region(self, region) + if len(region_prepped) > 1: + ret = [r.to_dict() for r in region_prepped] + else: + ret = region_prepped[0].to_dict() + return ret + + def _results_cache_add_analysis_params(self, params: dict): + super()._results_cache_add_analysis_params(params) + params["site_filters_analysis"] = self._site_filters_analysis + + def _snp_allele_counts( + self, + *, + region, + sample_sets, + sample_indices, + site_mask, + site_class, + cohort_size, + random_seed, + ): + # Access SNP calls. + ds_snps = self.snp_calls( + region=region, + sample_sets=sample_sets, + sample_indices=sample_indices, + site_mask=site_mask, + site_class=site_class, + cohort_size=cohort_size, + random_seed=random_seed, + ) + gt = ds_snps["call_genotype"] + + # Set up and run allele counts computation. + gt = allel.GenotypeDaskArray(gt.data) + ac = gt.count_alleles(max_allele=3) + with self._dask_progress(desc="Compute SNP allele counts"): + ac = ac.compute() + + # Return plain numpy array. + results = dict(ac=ac.values) + + return results + + @check_types + @doc( + summary=""" + Compute SNP allele counts. This returns the number of times each + SNP allele was observed in the selected samples. + """, + returns=""" + A numpy array of shape (n_variants, 4), where the first column has + the reference allele (0) counts, the second column has the first + alternate allele (1) counts, the third column has the second + alternate allele (2) counts, and the fourth column has the third + alternate allele (3) counts. + """, + notes=""" + This computation may take some time to run, depending on your + computing environment. Results of this computation will be cached + and re-used if the `results_cache` parameter was set when + instantiating the class. + """, + ) + def snp_allele_counts( + self, + region: base_params.region, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + sample_indices: Optional[base_params.sample_indices] = None, + site_mask: Optional[base_params.site_mask] = None, + site_class: Optional[base_params.site_class] = None, + cohort_size: Optional[base_params.cohort_size] = None, + random_seed: base_params.random_seed = 42, + ) -> np.ndarray: + # Change this name if you ever change the behaviour of this function, + # to invalidate any previously cached data. + name = "snp_allele_counts_v2" + + # Normalize params for consistent hash value. + ( + sample_sets_prepped, + sample_indices_prepped, + ) = self._prep_sample_selection_cache_params( + sample_sets=sample_sets, + sample_query=sample_query, + sample_indices=sample_indices, + ) + region_prepped = self._prep_region_cache_param(region=region) + site_mask_prepped = self._prep_optional_site_mask_param(site_mask=site_mask) + params = dict( + region=region_prepped, + sample_sets=sample_sets_prepped, + sample_indices=sample_indices_prepped, + site_mask=site_mask_prepped, + site_class=site_class, + cohort_size=cohort_size, + random_seed=random_seed, + ) + + try: + results = self.results_cache_get(name=name, params=params) + + except CacheMiss: + results = self._snp_allele_counts(**params) + self.results_cache_set(name=name, params=params, results=results) + + ac = results["ac"] + return ac + + @check_types + @doc( + summary=""" + Plot SNPs in a given genome region. SNPs are shown as rectangles, + with segregating and non-segregating SNPs positioned on different levels, + and coloured by site filter. + """, + parameters=dict( + max_snps="Maximum number of SNPs to show.", + ), + ) + def plot_snps( + self, + region: base_params.region, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + site_mask: base_params.site_mask = DEFAULT, + cohort_size: Optional[base_params.cohort_size] = None, + sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, + width: gplt_params.width = gplt_params.width_default, + track_height: gplt_params.height = 80, + genes_height: gplt_params.genes_height = gplt_params.genes_height_default, + max_snps: int = 200_000, + show: gplt_params.show = True, + ) -> gplt_params.figure: + # Plot SNPs track. + fig1 = self.plot_snps_track( + region=region, + sample_sets=sample_sets, + sample_query=sample_query, + site_mask=site_mask, + cohort_size=cohort_size, + sizing_mode=sizing_mode, + width=width, + height=track_height, + max_snps=max_snps, + show=False, + ) + fig1.xaxis.visible = False + + # Plot genes track. + fig2 = self.plot_genes( + region=region, + sizing_mode=sizing_mode, + width=width, + height=genes_height, + x_range=fig1.x_range, + show=False, + ) + + # Layout tracks in a grid. + fig = bokeh.layouts.gridplot( + [fig1, fig2], + ncols=1, + toolbar_location="above", + merge_tools=True, + sizing_mode=sizing_mode, + ) + + if show: + bokeh.plotting.show(fig) + + return fig + + @check_types + @doc( + summary=""" + Plot SNPs in a given genome region. SNPs are shown as rectangles, + with segregating and non-segregating SNPs positioned on different levels, + and coloured by site filter. + """, + parameters=dict( + max_snps="Maximum number of SNPs to show.", + ), + ) + def plot_snps_track( + self, + region: base_params.single_region, + sample_sets: Optional[base_params.sample_sets] = None, + sample_query: Optional[base_params.sample_query] = None, + site_mask: base_params.site_mask = DEFAULT, + cohort_size: Optional[base_params.cohort_size] = None, + sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, + width: gplt_params.width = gplt_params.width_default, + height: gplt_params.height = 120, + max_snps: int = 200_000, + x_range: Optional[gplt_params.x_range] = None, + show: gplt_params.show = True, + ) -> gplt_params.figure: + site_mask = self._prep_site_mask_param(site_mask=site_mask) + + # Resolve and check region. + resolved_region: Region = parse_single_region(self, region) + del region + + if ( + (resolved_region.start is None) + or (resolved_region.end is None) + or ((resolved_region.end - resolved_region.start) > max_snps) + ): + raise ValueError("Region is too large, please provide a smaller region.") + + # Compute allele counts. + ac = allel.AlleleCountsArray( + self.snp_allele_counts( + region=resolved_region, + sample_sets=sample_sets, + sample_query=sample_query, + site_mask=None, + cohort_size=cohort_size, + ) + ) + an = ac.sum(axis=1) + is_seg = ac.is_segregating() + is_var = ac.is_variant() + allelism = ac.allelism() + + # Obtain SNP variants data. + ds_sites = self.snp_variants( + region=resolved_region, + ).compute() + + # Build a dataframe. + pos = ds_sites["variant_position"].values + alleles = ds_sites["variant_allele"].values.astype("U") + cols = { + "pos": pos, + "allele_0": alleles[:, 0], + "allele_1": alleles[:, 1], + "allele_2": alleles[:, 2], + "allele_3": alleles[:, 3], + "ac_0": ac[:, 0], + "ac_1": ac[:, 1], + "ac_2": ac[:, 2], + "ac_3": ac[:, 3], + "an": an, + "is_seg": is_seg, + "is_var": is_var, + "allelism": allelism, + } + + for site_mask_id in self.site_mask_ids: + cols[f"pass_{site_mask_id}"] = ds_sites[ + f"variant_filter_pass_{site_mask_id}" + ].values + + data = pd.DataFrame(cols) + + # Find gaps in the reference genome. + seq = self.genome_sequence(region=resolved_region.contig).compute() + is_n = (seq == b"N") | (seq == b"n") + n_starts, n_stops = true_runs(is_n) + + # Create figure. + xwheel_zoom = bokeh.models.WheelZoomTool( + dimensions="width", maintain_focus=False + ) + pos = data["pos"].values + x_min = resolved_region.start or 1 + x_max = resolved_region.end or len(seq) + if x_range is None: + x_range = bokeh.models.Range1d(x_min, x_max, bounds="auto") + + tooltips = [ + ("Position", "$x{0,0}"), + ( + "Alleles", + "@allele_0 (@ac_0), @allele_1 (@ac_1), @allele_2 (@ac_2), @allele_3 (@ac_3)", + ), + ("No. alleles", "@allelism"), + ("Allele calls", "@an"), + ] + + for site_mask_id in self.site_mask_ids: + tooltips.append((f"Pass {site_mask_id}", f"@pass_{site_mask_id}")) + + fig = bokeh.plotting.figure( + title="SNPs", + tools=["xpan", "xzoom_in", "xzoom_out", xwheel_zoom, "reset"], + active_scroll=xwheel_zoom, + active_drag="xpan", + sizing_mode=sizing_mode, + width=width, + height=height, + toolbar_location="above", + x_range=x_range, + y_range=(0.5, 2.5), + tooltips=tooltips, + ) + hover_tool = fig.select(type=bokeh.models.HoverTool) + hover_tool.names = ["snps"] + + # Plot gaps in the reference genome. + df_n_runs = pd.DataFrame( + {"left": n_starts + 0.6, "right": n_stops + 0.4, "top": 2.5, "bottom": 0.5} + ) + fig.quad( + top="top", + bottom="bottom", + left="left", + right="right", + color="#cccccc", + source=df_n_runs, + name="gaps", + ) + + # Plot SNPs. + color_pass = bokeh.palettes.Colorblind6[3] + color_fail = bokeh.palettes.Colorblind6[5] + data["left"] = data["pos"] - 0.4 + data["right"] = data["pos"] + 0.4 + data["bottom"] = np.where(data["is_seg"], 1.6, 0.6) + data["top"] = data["bottom"] + 0.8 + data["color"] = np.where(data[f"pass_{site_mask}"], color_pass, color_fail) + fig.quad( + top="top", + bottom="bottom", + left="left", + right="right", + color="color", + source=data, + name="snps", + ) + + # Tidy plot. + fig.yaxis.ticker = bokeh.models.FixedTicker( + ticks=[1, 2], + ) + fig.yaxis.major_label_overrides = { + 1: "Non-segregating", + 2: "Segregating", + } + fig.xaxis.axis_label = f"Contig {resolved_region.contig} position (bp)" + fig.xaxis.ticker = bokeh.models.AdaptiveTicker(min_interval=1) + fig.xaxis.minor_tick_line_color = None + fig.xaxis[0].formatter = bokeh.models.NumeralTickFormatter(format="0,0") + + if show: + bokeh.plotting.show(fig) + + return fig + + @check_types + @doc( + summary="Compute genome accessibility array.", + returns="An array of boolean values identifying accessible genome sites.", + ) + def is_accessible( + self, + region: base_params.single_region, + site_mask: base_params.site_mask = DEFAULT, + inline_array: base_params.inline_array = base_params.inline_array_default, + chunks: base_params.chunks = base_params.chunks_default, + ) -> np.ndarray: + resolved_region: Region = parse_single_region(self, region) + del region + + # Determine contig sequence length. + seq_length = self.genome_sequence(resolved_region).shape[0] + + # Set up output. + is_accessible = np.zeros(seq_length, dtype=bool) + + # Access SNP site positions. + pos = self.snp_sites(region=resolved_region, field="POS").compute() + if resolved_region.start: + offset = resolved_region.start + else: + offset = 1 + + # Access site filters. + filter_pass = self._site_filters_for_region( + region=resolved_region, + mask=site_mask, + field="filter_pass", + inline_array=inline_array, + chunks=chunks, + ).compute() + + # Assign values from site filters. + is_accessible[pos - offset] = filter_pass + + return is_accessible diff --git a/malariagen_data/anopheles.py b/malariagen_data/anopheles.py index e3ba4abcf..72f5f4ed5 100644 --- a/malariagen_data/anopheles.py +++ b/malariagen_data/anopheles.py @@ -2,7 +2,6 @@ from abc import abstractmethod from collections import Counter from itertools import cycle -from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union import allel @@ -20,8 +19,6 @@ import xarray as xr import zarr from numpydoc_decorator import doc -from tqdm.auto import tqdm -from tqdm.dask import TqdmCallback from typing_extensions import Annotated, Literal, TypeAlias from . import veff @@ -29,6 +26,7 @@ from .anoph.genome_features import AnophelesGenomeFeaturesData, gplt_params from .anoph.genome_sequence import AnophelesGenomeSequenceData from .anoph.sample_metadata import AnophelesSampleMetadata, map_params +from .anoph.snp_data import AnophelesSnpData from .mjn import median_joining_network, mjn_graph from .util import ( DIM_ALLELE, @@ -37,18 +35,16 @@ DIM_VARIANT, CacheMiss, Region, - da_compress, + check_types, da_from_zarr, - dask_compress_dataset, - hash_params, init_zarr_store, jackknife_ci, jitter, locate_region, + parse_multi_region, + parse_single_region, plotly_discrete_legend, - resolve_region, simple_xarray_concat, - type_error, ) AA_CHANGE_QUERY = ( @@ -192,10 +188,14 @@ class frq_params: class het_params: """Parameters for functions related to heterozygosity and runs of homozygosity.""" - sample: TypeAlias = Annotated[ + single_sample: TypeAlias = Annotated[ Union[str, int], "Sample identifier or index within sample set.", ] + sample: TypeAlias = Annotated[ + Union[single_sample, List[single_sample], Tuple[single_sample, ...]], + "Sample identifier or index within sample set. Multiple values can also be provided as a list or tuple.", + ] window_size: TypeAlias = Annotated[ int, "Number of sites per window.", @@ -600,6 +600,7 @@ class dash_params: # work around pycharm failing to recognise that doc() is callable # noinspection PyCallingNonCallable class AnophelesDataResource( + AnophelesSnpData, AnophelesSampleMetadata, AnophelesGenomeFeaturesData, AnophelesGenomeSequenceData, @@ -614,9 +615,10 @@ def __init__( cohorts_analysis: Optional[str], aim_analysis: Optional[str], aim_metadata_dtype: Optional[Mapping[str, Any]], - site_filters_analysis, - bokeh_output_notebook, - results_cache, + site_filters_analysis: Optional[str], + default_site_mask: Optional[str], + bokeh_output_notebook: bool, + results_cache: Optional[str], log, debug, show_progress, @@ -626,7 +628,7 @@ def __init__( major_version_number: int, major_version_path: str, gff_gene_type: str, - gff_default_attributes: Tuple[str], + gff_default_attributes: Tuple[str, ...], storage_options: Mapping, # used by fsspec via init_filesystem(url, **kwargs) ): super().__init__( @@ -647,37 +649,19 @@ def __init__( cohorts_analysis=cohorts_analysis, aim_analysis=aim_analysis, aim_metadata_dtype=aim_metadata_dtype, + site_filters_analysis=site_filters_analysis, + default_site_mask=default_site_mask, + results_cache=results_cache, ) - # set up attributes - self._site_filters_analysis = site_filters_analysis - # set up caches # TODO review type annotations here, maybe can tighten - self._cache_site_filters: Dict = dict() - self._cache_snp_sites = None - self._cache_snp_genotypes: Dict = dict() self._cache_annotator = None self._cache_site_annotations = None self._cache_locate_site_class: Dict = dict() self._cache_haplotypes: Dict = dict() self._cache_haplotype_sites: Dict = dict() - if results_cache is not None: - results_cache = Path(results_cache).expanduser().resolve() - self._results_cache = results_cache - - # set analysis versions - - if site_filters_analysis is None: - self._site_filters_analysis = self.config.get( - "DEFAULT_SITE_FILTERS_ANALYSIS" - ) - else: - self._site_filters_analysis = site_filters_analysis - - # Start of @property - @property @abstractmethod def _pca_results_cache_name(self): @@ -728,35 +712,6 @@ def _ihs_gwss_cache_name(self): def _site_annotations_zarr_path(self): raise NotImplementedError("Must override _site_annotations_zarr_path") - # Start of @abstractmethod - - @property - @abstractmethod - def site_mask_ids(self): - """Identifiers for the different site masks that are available. - These are values than can be used for the `site_mask` parameter in any - method making using of SNP data. - - """ - raise NotImplementedError("Must override _site_mask_ids") - - @property - @abstractmethod - def _default_site_mask(self): - raise NotImplementedError("Must override _default_site_mask") - - def _prep_site_mask_param(self, *, site_mask): - if site_mask is None: - # allowed - pass - elif site_mask == DEFAULT: - site_mask = self._default_site_mask - elif site_mask not in self.site_mask_ids: - raise ValueError( - f"Invalid site mask, must be one of f{self.site_mask_ids}." - ) - return site_mask - @abstractmethod def _transcript_to_gene_name(self, transcript): # children may have different manual overrides. @@ -796,103 +751,7 @@ def _prep_phasing_analysis_param(self, *, analysis): ) return analysis - def _results_cache_add_analysis_params(self, params): - # default implementation, can be overridden if additional analysis - # params are used - params["cohorts_analysis"] = self._cohorts_analysis - params["site_filters_analysis"] = self._site_filters_analysis - - def results_cache_get(self, *, name, params): - debug = self._log.debug - if self._results_cache is None: - raise CacheMiss - params = params.copy() - self._results_cache_add_analysis_params(params) - cache_key, _ = hash_params(params) - cache_path = self._results_cache / name / cache_key - results_path = cache_path / "results.npz" - if not results_path.exists(): - raise CacheMiss - results = np.load(results_path) - debug(f"loaded {name}/{cache_key}") - return results - - def results_cache_set(self, *, name, params, results): - debug = self._log.debug - if self._results_cache is None: - debug("no results cache has been configured, do nothing") - return - params = params.copy() - self._results_cache_add_analysis_params(params) - cache_key, params_json = hash_params(params) - cache_path = self._results_cache / name / cache_key - cache_path.mkdir(exist_ok=True, parents=True) - params_path = cache_path / "params.json" - results_path = cache_path / "results.npz" - with params_path.open(mode="w") as f: - f.write(params_json) - np.savez_compressed(results_path, **results) - debug(f"saved {name}/{cache_key}") - - @doc( - summary=""" - Compute SNP allele counts. This returns the number of times each - SNP allele was observed in the selected samples. - """, - returns=""" - A numpy array of shape (n_variants, 4), where the first column has - the reference allele (0) counts, the second column has the first - alternate allele (1) counts, the third column has the second - alternate allele (2) counts, and the fourth column has the third - alternate allele (3) counts. - """, - notes=""" - This computation may take some time to run, depending on your - computing environment. Results of this computation will be cached - and re-used if the `results_cache` parameter was set when - instantiating the class. - """, - ) - def snp_allele_counts( - self, - region: base_params.region, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - site_mask: Optional[base_params.site_mask] = None, - site_class: Optional[base_params.site_class] = None, - cohort_size: Optional[base_params.cohort_size] = None, - random_seed: base_params.random_seed = 42, - ) -> np.ndarray: - # change this name if you ever change the behaviour of this function, - # to invalidate any previously cached data - name = self._snp_allele_counts_results_cache_name - - # normalize params for consistent hash value - sample_sets, sample_query = self._prep_sample_selection_cache_params( - sample_sets=sample_sets, sample_query=sample_query - ) - region = self._prep_region_cache_param(region=region) - site_mask = self._prep_site_mask_param(site_mask=site_mask) - params = dict( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - site_class=site_class, - cohort_size=cohort_size, - random_seed=random_seed, - ) - - try: - results = self.results_cache_get(name=name, params=params) - - except CacheMiss: - results = self._snp_allele_counts(**params) - self.results_cache_set(name=name, params=params, results=results) - - ac = results["ac"] - return ac - + @check_types @doc( summary=""" Group samples by taxon, area (space) and period (time), then compute @@ -1384,364 +1243,6 @@ def _roh_hmm_predict( ] ] - # Start of undecorated functions - - @doc( - summary="Open site filters zarr.", - returns="Zarr hierarchy.", - ) - def open_site_filters(self, mask: base_params.site_mask) -> zarr.hierarchy.Group: - try: - return self._cache_site_filters[mask] - except KeyError: - path = f"{self._base_path}/{self._major_version_path}/site_filters/{self._site_filters_analysis}/{mask}/" - store = init_zarr_store(fs=self._fs, path=path) - root = zarr.open_consolidated(store=store) - self._cache_site_filters[mask] = root - return root - - @doc( - summary="Open SNP sites zarr", - returns="Zarr hierarchy.", - ) - def open_snp_sites(self) -> zarr.hierarchy.Group: - if self._cache_snp_sites is None: - path = ( - f"{self._base_path}/{self._major_version_path}/snp_genotypes/all/sites/" - ) - store = init_zarr_store(fs=self._fs, path=path) - root = zarr.open_consolidated(store=store) - self._cache_snp_sites = root - return self._cache_snp_sites - - def _progress(self, iterable, **kwargs): - # progress doesn't mix well with debug logging - disable = self._debug or not self._show_progress - return tqdm(iterable, disable=disable, **kwargs) - - def _site_filters( - self, - *, - region, - mask, - field, - inline_array, - chunks, - ): - assert isinstance(region, Region) - root = self.open_site_filters(mask=mask) - z = root[f"{region.contig}/variants/{field}"] - d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) - if region.start or region.end: - root = self.open_snp_sites() - pos = root[f"{region.contig}/variants/POS"][:] - loc_region = locate_region(region, pos) - d = d[loc_region] - return d - - @doc( - summary="Access SNP site filters.", - returns=""" - An array of boolean values identifying sites that pass the filters. - """, - ) - def site_filters( - self, - region: base_params.region, - mask: base_params.site_mask, - field: base_params.field = "filter_pass", - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - ) -> da.Array: - # resolve the region parameter to a standard type - resolved_region = self.resolve_region(region) - del region - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - - d = da.concatenate( - [ - self._site_filters( - region=r, - mask=mask, - field=field, - inline_array=inline_array, - chunks=chunks, - ) - for r in resolved_region - ] - ) - - return d - - def _snp_sites_for_contig(self, contig, *, field, inline_array, chunks): - """Access SNP sites data for a single contig.""" - root = self.open_snp_sites() - z = root[f"{contig}/variants/{field}"] - ret = da_from_zarr(z, inline_array=inline_array, chunks=chunks) - return ret - - def _snp_sites( - self, - *, - region, - field, - inline_array, - chunks, - ): - assert isinstance(region, Region), type(region) - - ret = self._snp_sites_for_contig( - contig=region.contig, field=field, inline_array=inline_array, chunks=chunks - ) - - if region.start or region.end: - if field == "POS": - pos = ret - else: - pos = self._snp_sites_for_contig( - contig=region.contig, - field="POS", - inline_array=inline_array, - chunks=chunks, - ) - loc_region = locate_region(region, pos) - ret = ret[loc_region] - return ret - - @doc( - summary="Access SNP site data (positions and alleles).", - returns=""" - An array of either SNP positions ("POS"), reference alleles ("REF") or - alternate alleles ("ALT"). - """, - ) - def snp_sites( - self, - region: base_params.region, - field: base_params.field, - site_mask: Optional[base_params.site_mask] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - ) -> da.Array: - debug = self._log.debug - - # resolve the region parameter to a standard type - resolved_region = self.resolve_region(region) - del region - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - - debug("access SNP sites and concatenate over regions") - ret = da.concatenate( - [ - self._snp_sites( - region=r, - field=field, - chunks=chunks, - inline_array=inline_array, - ) - for r in resolved_region - ], - axis=0, - ) - - debug("apply site mask if requested") - if site_mask is not None: - loc_sites = self.site_filters( - region=resolved_region, - mask=site_mask, - chunks=chunks, - inline_array=inline_array, - ) - ret = da_compress(loc_sites, ret, axis=0) - - return ret - - @doc( - summary="Convert a genome region into a standard data structure.", - returns="An instance of the `Region` class.", - ) - def resolve_region(self, region: base_params.region) -> Region: - return resolve_region(self, region) - - def _prep_region_cache_param(self, *, region): - """Obtain a normalised representation of a region parameter which can - be used with the results cache.""" - - # N.B., we need to convert to a dict, because cache saves params as - # JSON - - region = self.resolve_region(region) - if isinstance(region, list): - region = [r.to_dict() for r in region] - else: - region = region.to_dict() - return region - - def _prep_sample_selection_cache_params(self, *, sample_sets, sample_query): - # normalise sample sets - sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - - # normalize sample_query - if isinstance(sample_query, str): - # resolve query to a list of integers for more cache hits - we - # do this because there are different ways to write the same pandas - # query, and so it's better to evaluate the query and use a list of - # integer indices instead - df_samples = self.sample_metadata(sample_sets=sample_sets) - loc_samples = df_samples.eval(sample_query).values - sample_query = np.nonzero(loc_samples)[0].tolist() - - return sample_sets, sample_query - - @doc( - summary="Open SNP genotypes zarr for a given sample set.", - returns="Zarr hierarchy.", - ) - def open_snp_genotypes( - self, sample_set: base_params.sample_set - ) -> zarr.hierarchy.Group: - try: - return self._cache_snp_genotypes[sample_set] - except KeyError: - release = self.lookup_release(sample_set=sample_set) - release_path = self._release_to_path(release) - path = f"{self._base_path}/{release_path}/snp_genotypes/all/{sample_set}/" - store = init_zarr_store(fs=self._fs, path=path) - root = zarr.open_consolidated(store=store) - self._cache_snp_genotypes[sample_set] = root - return root - - def _snp_genotypes_for_contig( - self, - *, - contig: str, - sample_set: str, - field: str, - inline_array: bool, - chunks: str, - ) -> da.Array: - """Access SNP genotypes for a single contig and a single sample set.""" - assert isinstance(contig, str) - assert isinstance(sample_set, str) - root = self.open_snp_genotypes(sample_set=sample_set) - z = root[f"{contig}/calldata/{field}"] - d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) - - return d - - @doc( - summary="Access SNP genotypes and associated data.", - returns=""" - An array of either genotypes (GT), genotype quality (GQ), allele - depths (AD) or mapping quality (MQ) values. - """, - ) - def snp_genotypes( - self, - region: base_params.region, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - field: base_params.field = "GT", - site_mask: Optional[base_params.site_mask] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - ) -> da.Array: - debug = self._log.debug - - debug("normalise parameters") - sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - resolved_region = self.resolve_region(region) - del region - - debug("normalise region to list to simplify concatenation logic") - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - - debug("concatenate multiple sample sets and/or contigs") - lx = [] - for r in resolved_region: - ly = [] - - for s in sample_sets: - y = self._snp_genotypes_for_contig( - contig=r.contig, - sample_set=s, - field=field, - inline_array=inline_array, - chunks=chunks, - ) - ly.append(y) - - debug("concatenate data from multiple sample sets") - x = da.concatenate(ly, axis=1) - - debug("locate region - do this only once, optimisation") - if r.start or r.end: - pos = self.snp_sites(region=r.contig, field="POS") - loc_region = locate_region(r, pos) - x = x[loc_region] - - lx.append(x) - - debug("concatenate data from multiple regions") - d = da.concatenate(lx, axis=0) - - debug("apply site filters if requested") - if site_mask is not None: - loc_sites = self.site_filters( - region=resolved_region, - mask=site_mask, - ) - d = da_compress(loc_sites, d, axis=0) - - debug("apply sample query if requested") - if sample_query is not None: - df_samples = self.sample_metadata(sample_sets=sample_sets) - loc_samples = df_samples.eval(sample_query).values - d = da.compress(loc_samples, d, axis=1) - - return d - - @doc( - summary="Compute genome accessibility array.", - returns="An array of boolean values identifying accessible genome sites.", - ) - def is_accessible( - self, - region: base_params.region, - site_mask: base_params.site_mask = DEFAULT, - ) -> np.ndarray: - debug = self._log.debug - - debug("resolve region") - resolved_region = self.resolve_region(region) - del region - - debug("determine contig sequence length") - seq_length = self.genome_sequence(resolved_region).shape[0] - - debug("set up output") - is_accessible = np.zeros(seq_length, dtype=bool) - - pos = self.snp_sites(region=resolved_region, field="POS").compute() - if resolved_region.start: - offset = resolved_region.start - else: - offset = 1 - - debug("access site filters") - filter_pass = self.site_filters( - region=resolved_region, - mask=site_mask, - ).compute() - - debug("assign values from site filters") - is_accessible[pos - offset] = filter_pass - - return is_accessible - def _snp_df(self, *, transcript: str) -> Tuple[Region, pd.DataFrame]: """Set up a dataframe with SNP site and filter columns.""" debug = self._log.debug @@ -1799,6 +1300,7 @@ def _annotator(self): ) return self._cache_annotator + @check_types @doc( summary="Compute variant effects for a gene transcript.", returns=""" @@ -1816,328 +1318,38 @@ def snp_effects( debug("setup initial dataframe of SNPs") _, df_snps = self._snp_df(transcript=transcript) - debug("setup variant effect annotator") - ann = self._annotator() - - debug("apply mask if requested") - if site_mask is not None: - loc_sites = df_snps[f"pass_{site_mask}"] - df_snps = df_snps.loc[loc_sites] - - debug("reset index after filtering") - df_snps.reset_index(inplace=True, drop=True) - - debug("add effects to the dataframe") - ann.get_effects(transcript=transcript, variants=df_snps) - - return df_snps - - def _snp_variants_for_contig(self, *, contig, inline_array, chunks): - debug = self._log.debug - - coords = dict() - data_vars = dict() - - debug("variant arrays") - sites_root = self.open_snp_sites() - - debug("variant_position") - pos_z = sites_root[f"{contig}/variants/POS"] - variant_position = da_from_zarr(pos_z, inline_array=inline_array, chunks=chunks) - coords["variant_position"] = [DIM_VARIANT], variant_position - - debug("variant_allele") - ref_z = sites_root[f"{contig}/variants/REF"] - alt_z = sites_root[f"{contig}/variants/ALT"] - ref = da_from_zarr(ref_z, inline_array=inline_array, chunks=chunks) - alt = da_from_zarr(alt_z, inline_array=inline_array, chunks=chunks) - variant_allele = da.concatenate([ref[:, None], alt], axis=1) - data_vars["variant_allele"] = [DIM_VARIANT, DIM_ALLELE], variant_allele - - debug("variant_contig") - contig_index = self.contigs.index(contig) - variant_contig = da.full_like( - variant_position, fill_value=contig_index, dtype="u1" - ) - coords["variant_contig"] = [DIM_VARIANT], variant_contig - - debug("site filters arrays") - for mask in self.site_mask_ids: - filters_root = self.open_site_filters(mask=mask) - z = filters_root[f"{contig}/variants/filter_pass"] - d = da_from_zarr(z, inline_array=inline_array, chunks=chunks) - data_vars[f"variant_filter_pass_{mask}"] = [DIM_VARIANT], d - - debug("set up attributes") - attrs = {"contigs": self.contigs} - - debug("create a dataset") - ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - - return ds - - def _snp_calls_for_contig(self, *, contig, sample_set, inline_array, chunks): - debug = self._log.debug - - coords = dict() - data_vars = dict() - - debug("call arrays") - calls_root = self.open_snp_genotypes(sample_set=sample_set) - gt_z = calls_root[f"{contig}/calldata/GT"] - call_genotype = da_from_zarr(gt_z, inline_array=inline_array, chunks=chunks) - gq_z = calls_root[f"{contig}/calldata/GQ"] - call_gq = da_from_zarr(gq_z, inline_array=inline_array, chunks=chunks) - ad_z = calls_root[f"{contig}/calldata/AD"] - call_ad = da_from_zarr(ad_z, inline_array=inline_array, chunks=chunks) - mq_z = calls_root[f"{contig}/calldata/MQ"] - call_mq = da_from_zarr(mq_z, inline_array=inline_array, chunks=chunks) - data_vars["call_genotype"] = ( - [DIM_VARIANT, DIM_SAMPLE, DIM_PLOIDY], - call_genotype, - ) - data_vars["call_GQ"] = ([DIM_VARIANT, DIM_SAMPLE], call_gq) - data_vars["call_MQ"] = ([DIM_VARIANT, DIM_SAMPLE], call_mq) - data_vars["call_AD"] = ( - [DIM_VARIANT, DIM_SAMPLE, DIM_ALLELE], - call_ad, - ) - - debug("sample arrays") - z = calls_root["samples"] - sample_id = da_from_zarr(z, inline_array=inline_array, chunks=chunks) - # decode to str, as it is stored as bytes objects - sample_id = sample_id.astype("U") - coords["sample_id"] = [DIM_SAMPLE], sample_id - - debug("create a dataset") - ds = xr.Dataset(data_vars=data_vars, coords=coords) - - return ds - - @doc( - summary="Access SNP sites, site filters and genotype calls.", - returns="A dataset containing SNP sites, site filters and genotype calls.", - ) - def snp_calls( - self, - region: base_params.region, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - site_mask: Optional[base_params.site_mask] = None, - site_class: Optional[base_params.site_class] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - cohort_size: Optional[base_params.cohort_size] = None, - min_cohort_size: Optional[base_params.min_cohort_size] = None, - max_cohort_size: Optional[base_params.max_cohort_size] = None, - random_seed: base_params.random_seed = 42, - ) -> xr.Dataset: - debug = self._log.debug - - debug("normalise parameters") - sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - resolved_region = self.resolve_region(region) - del region - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - - debug("access SNP calls and concatenate multiple sample sets and/or regions") - lx = [] - for r in resolved_region: - ly = [] - for s in sample_sets: - y = self._snp_calls_for_contig( - contig=r.contig, - sample_set=s, - inline_array=inline_array, - chunks=chunks, - ) - ly.append(y) - - debug("concatenate data from multiple sample sets") - x = simple_xarray_concat(ly, dim=DIM_SAMPLE) - - debug("add variants variables") - v = self._snp_variants_for_contig( - contig=r.contig, inline_array=inline_array, chunks=chunks - ) - x = xr.merge([v, x], compat="override", join="override") - - debug("handle site class") - if site_class is not None: - loc_ann = self._locate_site_class( - region=r.contig, - site_class=site_class, - site_mask=None, - ) - x = x.isel(variants=loc_ann) - - debug("handle region, do this only once - optimisation") - if r.start or r.end: - pos = x["variant_position"].values - loc_region = locate_region(r, pos) - x = x.isel(variants=loc_region) - - lx.append(x) - - debug("concatenate data from multiple regions") - ds = simple_xarray_concat(lx, dim=DIM_VARIANT) - - if site_mask is not None: - debug("apply site filters") - ds = dask_compress_dataset( - ds, indexer=f"variant_filter_pass_{site_mask}", dim=DIM_VARIANT - ) - - debug("add call_genotype_mask") - ds["call_genotype_mask"] = ds["call_genotype"] < 0 - - if sample_query is not None: - debug("handle sample query") - if isinstance(sample_query, str): - df_samples = self.sample_metadata(sample_sets=sample_sets) - loc_samples = df_samples.eval(sample_query).values - if np.count_nonzero(loc_samples) == 0: - raise ValueError(f"No samples found for query {sample_query!r}") - else: - # assume sample query is an indexer, e.g., a list of integers - loc_samples = sample_query - ds = ds.isel(samples=loc_samples) - - if cohort_size is not None: - debug("handle cohort size") - # overrides min and max - min_cohort_size = cohort_size - max_cohort_size = cohort_size - - if min_cohort_size is not None: - debug("handle min cohort size") - n_samples = ds.dims["samples"] - if n_samples < min_cohort_size: - raise ValueError( - f"not enough samples ({n_samples}) for minimum cohort size ({min_cohort_size})" - ) - - if max_cohort_size is not None: - debug("handle max cohort size") - n_samples = ds.dims["samples"] - if n_samples > max_cohort_size: - rng = np.random.default_rng(seed=random_seed) - loc_downsample = rng.choice( - n_samples, size=max_cohort_size, replace=False - ) - loc_downsample.sort() - ds = ds.isel(samples=loc_downsample) - - return ds - - def snp_dataset(self, *args, **kwargs): - """Deprecated, this method has been renamed to snp_calls().""" - return self.snp_calls(*args, **kwargs) - - def _snp_allele_counts( - self, - *, - region, - sample_sets, - sample_query, - site_mask, - site_class, - cohort_size, - random_seed, - ): - debug = self._log.debug - - debug("access SNP calls") - ds_snps = self.snp_calls( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - site_class=site_class, - cohort_size=cohort_size, - random_seed=random_seed, - ) - gt = ds_snps["call_genotype"] - - debug("set up and run allele counts computation") - gt = allel.GenotypeDaskArray(gt.data) - ac = gt.count_alleles(max_allele=3) - with self._dask_progress(desc="Compute SNP allele counts"): - ac = ac.compute() - - debug("return plain numpy array") - results = dict(ac=ac.values) - - return results - - def _dask_progress(self, **kwargs): - disable = not self._show_progress - return TqdmCallback(disable=disable, **kwargs) - - @doc( - summary="Access SNP sites and site filters.", - returns="A dataset containing SNP sites and site filters.", - ) - def snp_variants( - self, - region: base_params.region, - site_mask: Optional[base_params.site_mask] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - ): - debug = self._log.debug - - debug("normalise parameters") - resolved_region = self.resolve_region(region) - del region - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] - - debug("access SNP data and concatenate multiple regions") - lx = [] - for r in resolved_region: - debug("access variants") - x = self._snp_variants_for_contig( - contig=r.contig, - inline_array=inline_array, - chunks=chunks, - ) - - debug("handle region") - if r.start or r.end: - pos = x["variant_position"].values - loc_region = locate_region(r, pos) - x = x.isel(variants=loc_region) - - lx.append(x) - - debug("concatenate data from multiple regions") - ds = simple_xarray_concat(lx, dim=DIM_VARIANT) + debug("setup variant effect annotator") + ann = self._annotator() - debug("apply site filters") + debug("apply mask if requested") if site_mask is not None: - ds = dask_compress_dataset( - ds, indexer=f"variant_filter_pass_{site_mask}", dim=DIM_VARIANT - ) + loc_sites = df_snps[f"pass_{site_mask}"] + df_snps = df_snps.loc[loc_sites] - return ds + debug("reset index after filtering") + df_snps.reset_index(inplace=True, drop=True) + + debug("add effects to the dataframe") + ann.get_effects(transcript=transcript, variants=df_snps) + + return df_snps + @check_types @doc( - summary="", + summary="Create an IGV browser and inject into the current notebook.", parameters=dict( tracks="Configuration for any additional tracks.", ), returns="IGV browser.", ) def igv( - self, region: base_params.region, tracks: Optional[List] = None + self, region: base_params.single_region, tracks: Optional[List] = None ) -> igv_notebook.Browser: debug = self._log.debug debug("resolve region") - region = self.resolve_region(region) + region_prepped: Region = parse_single_region(self, region) + del region config = { "reference": { @@ -2155,7 +1367,7 @@ def igv( } ], }, - "locus": str(region), + "locus": str(region_prepped), } if tracks: config["tracks"] = tracks @@ -2167,6 +1379,7 @@ def igv( return browser + @check_types @doc( summary=""" Launch IGV and view sequence read alignments and SNP genotypes from @@ -2182,7 +1395,7 @@ def igv( ) def view_alignments( self, - region: base_params.region, + region: base_params.single_region, sample: str, visibility_window: int = 20_000, ): @@ -2203,7 +1416,7 @@ def view_alignments( debug(vcf_url) debug("parse region") - resolved_region = self.resolve_region(region) + resolved_region: Region = parse_single_region(self, region) del region contig = resolved_region.contig @@ -2252,7 +1465,7 @@ def _pca( n_snps, thin_offset, sample_sets, - sample_query, + sample_indices, site_mask, min_minor_ac, max_missing_an, @@ -2264,7 +1477,7 @@ def _pca( ds_snps = self.snp_calls( region=region, sample_sets=sample_sets, - sample_query=sample_query, + sample_indices=sample_indices, site_mask=site_mask, ) debug( @@ -2275,7 +1488,7 @@ def _pca( ac = self.snp_allele_counts( region=region, sample_sets=sample_sets, - sample_query=sample_query, + sample_indices=sample_indices, site_mask=site_mask, ) n_chroms = ds_snps.dims["samples"] * 2 @@ -2325,6 +1538,7 @@ def _pca( results = dict(coords=coords, evr=model.explained_variance_ratio_) return results + @check_types @doc( summary=""" Plot explained variance ratios from a principal components analysis @@ -2403,39 +1617,18 @@ def _prep_samples_for_cohort_grouping(self, *, df_samples, area_by, period_by): return df_samples - def _region_str(self, region): - """Convert a region to a string representation. - - Parameters - ---------- - region : Region or list of Region - The region to display. - - Returns - ------- - out : str - - """ - if isinstance(region, list): - if len(region) > 1: - return "; ".join([self._region_str(r) for r in region]) - else: - region = region[0] - - # sanity check - assert isinstance(region, Region) - - return str(region) - - def _lookup_sample(self, sample, sample_set=None): + def _lookup_sample( + self, + sample: het_params.single_sample, + sample_set: Optional[base_params.sample_set] = None, + ): df_samples = self.sample_metadata(sample_sets=sample_set).set_index("sample_id") sample_rec = None if isinstance(sample, str): sample_rec = df_samples.loc[sample] - elif isinstance(sample, int): - sample_rec = df_samples.iloc[sample] else: - type_error(name="sample", value=sample, expectation=(str, int)) + assert isinstance(sample, int) + sample_rec = df_samples.iloc[sample] return sample_rec def _plot_heterozygosity_track( @@ -2445,7 +1638,7 @@ def _plot_heterozygosity_track( sample_set, windows, counts, - region, + region: Region, window_size, y_max, sizing_mode, @@ -2457,8 +1650,6 @@ def _plot_heterozygosity_track( ): debug = self._log.debug - region = self.resolve_region(region) - # pos axis window_pos = windows.mean(axis=1) @@ -2515,13 +1706,14 @@ def _plot_heterozygosity_track( return fig + @check_types @doc( summary="Plot windowed heterozygosity for a single sample over a genome region.", ) def plot_heterozygosity_track( self, - sample: het_params.sample, - region: base_params.region, + sample: het_params.single_sample, + region: base_params.single_region, window_size: het_params.window_size = het_params.window_size_default, y_max: het_params.y_max = het_params.y_max_default, circle_kwargs: Optional[het_params.circle_kwargs] = None, @@ -2535,10 +1727,14 @@ def plot_heterozygosity_track( ) -> gplt_params.figure: debug = self._log.debug + # Normalise parameters. + region_prepped: Region = parse_single_region(self, region) + del region + debug("compute windowed heterozygosity") sample_id, sample_set, windows, counts = self._sample_count_het( sample=sample, - region=region, + region=region_prepped, site_mask=site_mask, window_size=window_size, sample_set=sample_set, @@ -2550,7 +1746,7 @@ def plot_heterozygosity_track( sample_set=sample_set, windows=windows, counts=counts, - region=region, + region=region_prepped, window_size=window_size, y_max=y_max, sizing_mode=sizing_mode, @@ -2563,6 +1759,7 @@ def plot_heterozygosity_track( return fig + @check_types @doc( summary="Plot windowed heterozygosity for a single sample over a genome region.", returns="Bokeh figure.", @@ -2570,7 +1767,7 @@ def plot_heterozygosity_track( def plot_heterozygosity( self, sample: het_params.sample, - region: base_params.region, + region: base_params.single_region, window_size: het_params.window_size = het_params.window_size_default, y_max: het_params.y_max = het_params.y_max_default, circle_kwargs: Optional[het_params.circle_kwargs] = None, @@ -2653,15 +1850,14 @@ def plot_heterozygosity( def _sample_count_het( self, - sample, - region, - site_mask, - window_size, - sample_set=None, + sample: het_params.single_sample, + region: Region, + site_mask: base_params.site_mask, + window_size: het_params.window_size, + sample_set: Optional[base_params.sample_set] = None, ): debug = self._log.debug - region = self.resolve_region(region) site_mask = self._prep_site_mask_param(site_mask=site_mask) debug("access sample metadata, look up sample") @@ -2701,13 +1897,14 @@ def _sample_count_het( return sample_id, sample_set, windows, counts + @check_types @doc( summary="Infer runs of homozygosity for a single sample over a genome region.", ) def roh_hmm( self, - sample: het_params.sample, - region: base_params.region, + sample: het_params.single_sample, + region: base_params.single_region, window_size: het_params.window_size = het_params.window_size_default, site_mask: base_params.site_mask = DEFAULT, sample_set: Optional[base_params.sample_set] = None, @@ -2717,7 +1914,7 @@ def roh_hmm( ) -> het_params.df_roh: debug = self._log.debug - resolved_region = self.resolve_region(region) + resolved_region: Region = parse_single_region(self, region) del region debug("compute windowed heterozygosity") @@ -2743,13 +1940,14 @@ def roh_hmm( return df_roh + @check_types @doc( summary="Plot a runs of homozygosity track.", ) def plot_roh_track( self, df_roh: het_params.df_roh, - region: base_params.region, + region: base_params.single_region, sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 100, @@ -2760,7 +1958,7 @@ def plot_roh_track( debug = self._log.debug debug("handle region parameter - this determines the genome region to plot") - resolved_region = self.resolve_region(region) + resolved_region: Region = parse_single_region(self, region) del region contig = resolved_region.contig start = resolved_region.start @@ -2826,6 +2024,7 @@ def plot_roh_track( return fig + @check_types @doc( summary=""" Plot windowed heterozygosity and inferred runs of homozygosity for a @@ -2834,8 +2033,8 @@ def plot_roh_track( ) def plot_roh( self, - sample: het_params.sample, - region: base_params.region, + sample: het_params.single_sample, + region: base_params.single_region, window_size: het_params.window_size = het_params.window_size_default, site_mask: base_params.site_mask = DEFAULT, sample_set: Optional[base_params.sample_set] = None, @@ -2853,7 +2052,7 @@ def plot_roh( ) -> gplt_params.figure: debug = self._log.debug - resolved_region = self.resolve_region(region) + resolved_region: Region = parse_single_region(self, region) del region debug("compute windowed heterozygosity") @@ -2934,215 +2133,7 @@ def plot_roh( return fig_all - def _locate_site_class( - self, - *, - region, - site_mask, - site_class, - ): - debug = self._log.debug - - # cache these data in memory to avoid repeated computation - cache_key = (region, site_mask, site_class) - - try: - loc_ann = self._cache_locate_site_class[cache_key] - - except KeyError: - debug("access site annotations data") - ds_ann = self.site_annotations( - region=region, - site_mask=site_mask, - ) - codon_pos = ds_ann["codon_position"].data - codon_deg = ds_ann["codon_degeneracy"].data - seq_cls = ds_ann["seq_cls"].data - seq_flen = ds_ann["seq_flen"].data - seq_relpos_start = ds_ann["seq_relpos_start"].data - seq_relpos_stop = ds_ann["seq_relpos_stop"].data - site_class = site_class.upper() - - debug("define constants used in site annotations data") - # FIXME: variable in function should be lowercase - SEQ_CLS_UNKNOWN = 0 # noqa - SEQ_CLS_UPSTREAM = 1 - SEQ_CLS_DOWNSTREAM = 2 - SEQ_CLS_5UTR = 3 - SEQ_CLS_3UTR = 4 - SEQ_CLS_CDS_FIRST = 5 - SEQ_CLS_CDS_MID = 6 - SEQ_CLS_CDS_LAST = 7 - SEQ_CLS_INTRON_FIRST = 8 - SEQ_CLS_INTRON_MID = 9 - SEQ_CLS_INTRON_LAST = 10 - CODON_DEG_UNKNOWN = 0 # noqa - CODON_DEG_0 = 1 - CODON_DEG_2_SIMPLE = 2 - CODON_DEG_2_COMPLEX = 3 # noqa - CODON_DEG_4 = 4 - - debug("set up site selection") - - if site_class == "CDS_DEG_4": - # 4-fold degenerate coding sites - loc_ann = ( - ( - (seq_cls == SEQ_CLS_CDS_FIRST) - | (seq_cls == SEQ_CLS_CDS_MID) - | (seq_cls == SEQ_CLS_CDS_LAST) - ) - & (codon_pos == 2) - & (codon_deg == CODON_DEG_4) - ) - - elif site_class == "CDS_DEG_2_SIMPLE": - # 2-fold degenerate coding sites - loc_ann = ( - ( - (seq_cls == SEQ_CLS_CDS_FIRST) - | (seq_cls == SEQ_CLS_CDS_MID) - | (seq_cls == SEQ_CLS_CDS_LAST) - ) - & (codon_pos == 2) - & (codon_deg == CODON_DEG_2_SIMPLE) - ) - - elif site_class == "CDS_DEG_0": - # non-degenerate coding sites - loc_ann = ( - (seq_cls == SEQ_CLS_CDS_FIRST) - | (seq_cls == SEQ_CLS_CDS_MID) - | (seq_cls == SEQ_CLS_CDS_LAST) - ) & (codon_deg == CODON_DEG_0) - - elif site_class == "INTRON_SHORT": - # short introns, excluding splice regions - loc_ann = ( - ( - (seq_cls == SEQ_CLS_INTRON_FIRST) - | (seq_cls == SEQ_CLS_INTRON_MID) - | (seq_cls == SEQ_CLS_INTRON_LAST) - ) - & (seq_flen < 100) - & (seq_relpos_start > 10) - & (seq_relpos_stop > 10) - ) - - elif site_class == "INTRON_LONG": - # long introns, excluding splice regions - loc_ann = ( - ( - (seq_cls == SEQ_CLS_INTRON_FIRST) - | (seq_cls == SEQ_CLS_INTRON_MID) - | (seq_cls == SEQ_CLS_INTRON_LAST) - ) - & (seq_flen > 200) - & (seq_relpos_start > 10) - & (seq_relpos_stop > 10) - ) - - elif site_class == "INTRON_SPLICE_5PRIME": - # 5' intron splice regions - loc_ann = ( - (seq_cls == SEQ_CLS_INTRON_FIRST) - | (seq_cls == SEQ_CLS_INTRON_MID) - | (seq_cls == SEQ_CLS_INTRON_LAST) - ) & (seq_relpos_start < 2) - - elif site_class == "INTRON_SPLICE_3PRIME": - # 3' intron splice regions - loc_ann = ( - (seq_cls == SEQ_CLS_INTRON_FIRST) - | (seq_cls == SEQ_CLS_INTRON_MID) - | (seq_cls == SEQ_CLS_INTRON_LAST) - ) & (seq_relpos_stop < 2) - - elif site_class == "UTR_5PRIME": - # 5' UTR - loc_ann = seq_cls == SEQ_CLS_5UTR - - elif site_class == "UTR_3PRIME": - # 3' UTR - loc_ann = seq_cls == SEQ_CLS_3UTR - - elif site_class == "INTERGENIC": - # intergenic regions, distant from a gene - loc_ann = ( - (seq_cls == SEQ_CLS_UPSTREAM) & (seq_relpos_stop > 10_000) - ) | ((seq_cls == SEQ_CLS_DOWNSTREAM) & (seq_relpos_start > 10_000)) - - else: - raise NotImplementedError(site_class) - - debug("compute site selection") - with self._dask_progress(desc=f"Locate {site_class} sites"): - loc_ann = loc_ann.compute() - - self._cache_locate_site_class[cache_key] = loc_ann - - return loc_ann - - @doc( - summary="Load site annotations.", - returns="A dataset of site annotations.", - ) - def site_annotations( - self, - region: base_params.region, - site_mask: Optional[base_params.site_mask] = None, - inline_array: base_params.inline_array = base_params.inline_array_default, - chunks: base_params.chunks = base_params.chunks_default, - ) -> xr.Dataset: - # N.B., we default to chunks="auto" here for performance reasons - - debug = self._log.debug - - debug("resolve region") - resolved_region = self.resolve_region(region) - del region - if isinstance(resolved_region, list): - raise TypeError("Multiple regions not supported.") - contig = resolved_region.contig - - debug("open site annotations zarr") - root = self.open_site_annotations() - - debug("build a dataset") - ds = xr.Dataset() - for field in ( - "codon_degeneracy", - "codon_nonsyn", - "codon_position", - "seq_cls", - "seq_flen", - "seq_relpos_start", - "seq_relpos_stop", - ): - data = da_from_zarr( - root[field][contig], - inline_array=inline_array, - chunks=chunks, - ) - ds[field] = "variants", data - - debug("subset to SNP positions") - pos = self.snp_sites( - region=contig, - field="POS", - site_mask=site_mask, - inline_array=inline_array, - chunks=chunks, - ) - pos = pos.compute() - if resolved_region.start or resolved_region.end: - loc_region = locate_region(resolved_region, pos) - pos = pos[loc_region] - idx = pos - 1 - ds = ds.isel(variants=idx) - - return ds - + @check_types @doc( summary=""" Run a principal components analysis (PCA) using biallelic SNPs from @@ -3162,7 +2153,7 @@ def pca( thin_offset: pca_params.thin_offset = pca_params.thin_offset_default, sample_sets: Optional[base_params.sample_sets] = None, sample_query: Optional[base_params.sample_query] = None, - site_mask: base_params.site_mask = DEFAULT, + site_mask: Optional[base_params.site_mask] = DEFAULT, min_minor_ac: pca_params.min_minor_ac = pca_params.min_minor_ac_default, max_missing_an: pca_params.max_missing_an = pca_params.max_missing_an_default, n_components: pca_params.n_components = pca_params.n_components_default, @@ -3174,18 +2165,23 @@ def pca( name = self._pca_results_cache_name debug("normalize params for consistent hash value") - sample_sets, sample_query = self._prep_sample_selection_cache_params( - sample_sets=sample_sets, sample_query=sample_query + ( + sample_sets_prepped, + sample_indices_prepped, + ) = self._prep_sample_selection_cache_params( + sample_sets=sample_sets, + sample_query=sample_query, + sample_indices=None, ) - region = self._prep_region_cache_param(region=region) - site_mask = self._prep_site_mask_param(site_mask=site_mask) + region_prepped = self._prep_region_cache_param(region=region) + site_mask_prepped = self._prep_optional_site_mask_param(site_mask=site_mask) params = dict( - region=region, + region=region_prepped, n_snps=n_snps, thin_offset=thin_offset, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, + sample_sets=sample_sets_prepped, + sample_indices=sample_indices_prepped, + site_mask=site_mask_prepped, min_minor_ac=min_minor_ac, max_missing_an=max_missing_an, n_components=n_components, @@ -3206,7 +2202,7 @@ def pca( debug("add coords to sample metadata dataframe") df_samples = self.sample_metadata( sample_sets=sample_sets, - sample_query=sample_query, + sample_indices=sample_indices_prepped, ) df_coords = pd.DataFrame( {f"PC{i + 1}": coords[:, i] for i in range(n_components)} @@ -3215,261 +2211,7 @@ def pca( return df_pca, evr - @doc( - summary=""" - Plot SNPs in a given genome region. SNPs are shown as rectangles, - with segregating and non-segregating SNPs positioned on different levels, - and coloured by site filter. - """, - parameters=dict( - max_snps="Maximum number of SNPs to show.", - ), - ) - def plot_snps( - self, - region: base_params.region, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - site_mask: base_params.site_mask = DEFAULT, - cohort_size: Optional[base_params.cohort_size] = None, - sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, - width: gplt_params.width = gplt_params.width_default, - track_height: gplt_params.height = 80, - genes_height: gplt_params.genes_height = gplt_params.genes_height_default, - max_snps: int = 200_000, - show: gplt_params.show = True, - ) -> gplt_params.figure: - debug = self._log.debug - - debug("plot SNPs track") - fig1 = self.plot_snps_track( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - cohort_size=cohort_size, - sizing_mode=sizing_mode, - width=width, - height=track_height, - max_snps=max_snps, - show=False, - ) - fig1.xaxis.visible = False - - debug("plot genes track") - fig2 = self.plot_genes( - region=region, - sizing_mode=sizing_mode, - width=width, - height=genes_height, - x_range=fig1.x_range, - show=False, - ) - - fig = bokeh.layouts.gridplot( - [fig1, fig2], - ncols=1, - toolbar_location="above", - merge_tools=True, - sizing_mode=sizing_mode, - ) - - if show: - bokeh.plotting.show(fig) - - return fig - - @doc( - summary="Open site annotations zarr.", - returns="Zarr hierarchy.", - ) - def open_site_annotations(self) -> zarr.hierarchy.Group: - if self._cache_site_annotations is None: - path = f"{self._base_path}/{self._site_annotations_zarr_path}" - store = init_zarr_store(fs=self._fs, path=path) - self._cache_site_annotations = zarr.open_consolidated(store=store) - return self._cache_site_annotations - - @doc( - summary=""" - Plot SNPs in a given genome region. SNPs are shown as rectangles, - with segregating and non-segregating SNPs positioned on different levels, - and coloured by site filter. - """, - parameters=dict( - max_snps="Maximum number of SNPs to show.", - ), - ) - def plot_snps_track( - self, - region: base_params.region, - sample_sets: Optional[base_params.sample_sets] = None, - sample_query: Optional[base_params.sample_query] = None, - site_mask: base_params.site_mask = DEFAULT, - cohort_size: Optional[base_params.cohort_size] = None, - sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, - width: gplt_params.width = gplt_params.width_default, - height: gplt_params.height = 120, - max_snps: int = 200_000, - x_range: Optional[gplt_params.x_range] = None, - show: gplt_params.show = True, - ) -> gplt_params.figure: - debug = self._log.debug - - site_mask = self._prep_site_mask_param(site_mask=site_mask) - - debug("resolve and check region") - resolved_region = self.resolve_region(region) - del region - - if ( - (resolved_region.start is None) - or (resolved_region.end is None) - or ((resolved_region.end - resolved_region.start) > max_snps) - ): - raise ValueError("Region is too large, please provide a smaller region.") - - debug("compute allele counts") - ac = allel.AlleleCountsArray( - self.snp_allele_counts( - region=resolved_region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=None, - cohort_size=cohort_size, - ) - ) - an = ac.sum(axis=1) - is_seg = ac.is_segregating() - is_var = ac.is_variant() - allelism = ac.allelism() - - debug("obtain SNP variants data") - ds_sites = self.snp_variants( - region=resolved_region, - ).compute() - - debug("build a dataframe") - pos = ds_sites["variant_position"].values - alleles = ds_sites["variant_allele"].values.astype("U") - cols = { - "pos": pos, - "allele_0": alleles[:, 0], - "allele_1": alleles[:, 1], - "allele_2": alleles[:, 2], - "allele_3": alleles[:, 3], - "ac_0": ac[:, 0], - "ac_1": ac[:, 1], - "ac_2": ac[:, 2], - "ac_3": ac[:, 3], - "an": an, - "is_seg": is_seg, - "is_var": is_var, - "allelism": allelism, - } - - for site_mask_id in self.site_mask_ids: - cols[f"pass_{site_mask_id}"] = ds_sites[ - f"variant_filter_pass_{site_mask_id}" - ].values - - data = pd.DataFrame(cols) - - debug("create figure") - xwheel_zoom = bokeh.models.WheelZoomTool( - dimensions="width", maintain_focus=False - ) - pos = data["pos"].values - x_min = pos[0] - x_max = pos[-1] - if x_range is None: - x_range = bokeh.models.Range1d(x_min, x_max, bounds="auto") - - tooltips = [ - ("Position", "$x{0,0}"), - ( - "Alleles", - "@allele_0 (@ac_0), @allele_1 (@ac_1), @allele_2 (@ac_2), @allele_3 (@ac_3)", - ), - ("No. alleles", "@allelism"), - ("Allele calls", "@an"), - ] - - for site_mask_id in self.site_mask_ids: - tooltips.append((f"Pass {site_mask_id}", f"@pass_{site_mask_id}")) - - fig = bokeh.plotting.figure( - title="SNPs", - tools=["xpan", "xzoom_in", "xzoom_out", xwheel_zoom, "reset"], - active_scroll=xwheel_zoom, - active_drag="xpan", - sizing_mode=sizing_mode, - width=width, - height=height, - toolbar_location="above", - x_range=x_range, - y_range=(0.5, 2.5), - tooltips=tooltips, - ) - hover_tool = fig.select(type=bokeh.models.HoverTool) - hover_tool.names = ["snps"] - - debug("plot gaps in the reference genome") - seq = self.genome_sequence(region=resolved_region.contig).compute() - is_n = (seq == b"N") | (seq == b"n") - loc_n_start = ~is_n[:-1] & is_n[1:] - loc_n_stop = is_n[:-1] & ~is_n[1:] - n_starts = np.nonzero(loc_n_start)[0] - n_stops = np.nonzero(loc_n_stop)[0] - df_n_runs = pd.DataFrame( - {"left": n_starts + 1.6, "right": n_stops + 1.4, "top": 2.5, "bottom": 0.5} - ) - fig.quad( - top="top", - bottom="bottom", - left="left", - right="right", - color="#cccccc", - source=df_n_runs, - name="gaps", - ) - - debug("plot SNPs") - color_pass = bokeh.palettes.Colorblind6[3] - color_fail = bokeh.palettes.Colorblind6[5] - data["left"] = data["pos"] - 0.4 - data["right"] = data["pos"] + 0.4 - data["bottom"] = np.where(data["is_seg"], 1.6, 0.6) - data["top"] = data["bottom"] + 0.8 - data["color"] = np.where(data[f"pass_{site_mask}"], color_pass, color_fail) - fig.quad( - top="top", - bottom="bottom", - left="left", - right="right", - color="color", - source=data, - name="snps", - ) - - debug("tidy plot") - fig.yaxis.ticker = bokeh.models.FixedTicker( - ticks=[1, 2], - ) - fig.yaxis.major_label_overrides = { - 1: "Non-segregating", - 2: "Segregating", - } - fig.xaxis.axis_label = f"Contig {resolved_region.contig} position (bp)" - fig.xaxis.ticker = bokeh.models.AdaptiveTicker(min_interval=1) - fig.xaxis.minor_tick_line_color = None - fig.xaxis[0].formatter = bokeh.models.NumeralTickFormatter(format="0,0") - - if show: - bokeh.plotting.show(fig) - - return fig - + @check_types @doc( summary=""" Compute SNP allele frequencies for a gene transcript. @@ -3602,6 +2344,7 @@ def snp_allele_frequencies( return df_snps + @check_types @doc( summary=""" Compute amino acid substitution frequencies for a gene transcript. @@ -3691,6 +2434,7 @@ def aa_allele_frequencies( return df_aaf + @check_types @doc( summary=""" Group samples by taxon, area (space) and period (time), then compute @@ -3953,6 +2697,7 @@ def _block_jackknife_cohort_diversity_stats( tajima_d_ci_upp=tajima_d_ci_upp, ) + @check_types @doc( summary=""" Compute genetic diversity summary statistics for a cohort of @@ -4052,6 +2797,7 @@ def cohort_diversity_stats( return pd.Series(stats) + @check_types @doc( summary=""" Compute genetic diversity summary statistics for multiple cohorts. @@ -4145,6 +2891,7 @@ def diversity_stats( return df_stats + @check_types @doc( summary=""" Run a Fst genome-wide scan to investigate genetic differentiation @@ -4187,7 +2934,7 @@ def fst_gwss( cohort1_query=cohort1_query, cohort2_query=cohort2_query, sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), - site_mask=self._prep_site_mask_param(site_mask=site_mask), + site_mask=self._prep_optional_site_mask_param(site_mask=site_mask), cohort_size=cohort_size, min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, @@ -4258,6 +3005,7 @@ def _fst_gwss( return results + @check_types @doc( summary=""" Plot a heatmap from a pandas DataFrame of frequencies, e.g., output @@ -4396,6 +3144,7 @@ def plot_frequencies_heatmap( return fig + @check_types @doc( summary="Create a time series plot of variant frequencies using plotly.", parameters=dict( @@ -4511,6 +3260,7 @@ def plot_frequencies_time_series( return fig + @check_types @doc( summary=""" Plot markers on a map showing variant frequencies for cohorts grouped @@ -4604,6 +3354,7 @@ def plot_frequencies_map_markers( ) m.add_layer(marker) + @check_types @doc( summary=""" Create an interactive map with markers showing variant frequencies or @@ -4673,6 +3424,7 @@ def plot_frequencies_interactive_map( return out + @check_types @doc( summary=""" Plot sample coordinates from a principal components analysis (PCA) @@ -4760,6 +3512,7 @@ def plot_pca_coords( return fig + @check_types @doc( summary=""" Plot sample coordinates from a principal components analysis (PCA) @@ -4847,6 +3600,7 @@ def plot_pca_coords_3d( return fig + @check_types @doc( summary="Plot diversity summary statistics for multiple cohorts.", parameters=dict( @@ -4963,6 +3717,7 @@ def plot_diversity_stats( ) fig.show() + @check_types @doc( summary=""" Run and plot a Fst genome-wide scan to investigate genetic @@ -5051,6 +3806,7 @@ def plot_fst_gwss_track( return fig + @check_types @doc( summary=""" Run and plot a Fst genome-wide scan to investigate genetic @@ -5121,6 +3877,7 @@ def plot_fst_gwss( bokeh.plotting.show(fig) + @check_types @doc( summary="Open haplotypes zarr.", returns="Zarr hierarchy.", @@ -5129,7 +3886,7 @@ def open_haplotypes( self, sample_set: base_params.sample_set, analysis: hap_params.analysis = DEFAULT, - ) -> zarr.hierarchy.Group: + ) -> Optional[zarr.hierarchy.Group]: analysis = self._prep_phasing_analysis_param(analysis=analysis) try: return self._cache_haplotypes[(sample_set, analysis)] @@ -5138,7 +3895,7 @@ def open_haplotypes( release_path = self._release_to_path(release) path = f"{self._base_path}/{release_path}/snp_haplotypes/{sample_set}/{analysis}/zarr" store = init_zarr_store(fs=self._fs, path=path) - # some sample sets have no data for a given analysis, handle this + # Some sample sets have no data for a given analysis, handle this. try: root = zarr.open_consolidated(store=store) except FileNotFoundError: @@ -5146,6 +3903,7 @@ def open_haplotypes( self._cache_haplotypes[(sample_set, analysis)] = root return root + @check_types @doc( summary="Open haplotype sites zarr.", returns="Zarr hierarchy.", @@ -5236,6 +3994,7 @@ def _haplotypes_for_contig( return ds + @check_types @doc( summary="Access haplotype data.", returns="A dataset of haplotypes and associated data.", @@ -5257,16 +4016,13 @@ def haplotypes( debug("normalise parameters") sample_sets = self._prep_sample_sets_param(sample_sets=sample_sets) - resolved_region = self.resolve_region(region) + regions: List[Region] = parse_multi_region(self, region) del region - - if isinstance(resolved_region, Region): - resolved_region = [resolved_region] analysis = self._prep_phasing_analysis_param(analysis=analysis) debug("build dataset") lx = [] - for r in resolved_region: + for r in regions: ly = [] for s in sample_sets: @@ -5343,6 +4099,7 @@ def haplotypes( return ds + @check_types @doc( summary="Generate h12 GWSS calibration data for different window sizes.", returns=""" @@ -5365,7 +4122,7 @@ def h12_calibration( ] = h12_params.max_cohort_size_default, window_sizes: h12_params.window_sizes = h12_params.window_sizes_default, random_seed: base_params.random_seed = 42, - ) -> List[np.ndarray]: + ) -> Mapping[str, np.ndarray]: # change this name if you ever change the behaviour of this function, to # invalidate any previously cached data name = self._h12_calibration_cache_name @@ -5405,7 +4162,7 @@ def _h12_calibration( max_cohort_size, window_sizes, random_seed, - ): + ) -> Mapping[str, np.ndarray]: # access haplotypes ds_haps = self.haplotypes( region=contig, @@ -5422,13 +4179,14 @@ def _h12_calibration( with self._dask_progress(desc="Load haplotypes"): ht = gt.to_haplotypes().compute() - calibration_runs = dict() + calibration_runs: Dict[str, np.ndarray] = dict() for window_size in self._progress(window_sizes, desc="Compute H12"): h1, h12, h123, h2_h1 = allel.moving_garud_h(ht, size=window_size) calibration_runs[str(window_size)] = h12 return calibration_runs + @check_types @doc( summary="Plot h12 GWSS calibration data for different window sizes.", parameters=dict( @@ -5512,6 +4270,7 @@ def plot_h12_calibration( bokeh.plotting.show(fig) return fig + @check_types @doc( summary="Run h12 genome-wide selection scan.", returns=dict( @@ -5602,6 +4361,7 @@ def _h12_gwss( return results + @check_types @doc( summary="Plot h12 GWSS data.", ) @@ -5685,6 +4445,7 @@ def plot_h12_gwss_track( return fig + @check_types @doc( summary="Plot h12 GWSS data.", ) @@ -5750,6 +4511,7 @@ def plot_h12_gwss( bokeh.plotting.show(fig) + @check_types @doc( summary=""" Run a H1X genome-wide scan to detect genome regions with @@ -5863,6 +4625,7 @@ def _h1x_gwss( return results + @check_types @doc( summary=""" Run and plot a H1X genome-wide scan to detect genome regions @@ -5951,6 +4714,7 @@ def plot_h1x_gwss_track( return fig + @check_types @doc( summary=""" Run and plot a H1X genome-wide scan to detect genome regions @@ -6021,6 +4785,7 @@ def plot_h1x_gwss( bokeh.plotting.show(fig) + @check_types @doc( summary="Run iHS GWSS.", returns=dict( @@ -6186,6 +4951,7 @@ def _ihs_gwss( return results + @check_types @doc( summary="Run and plot iHS GWSS data.", ) @@ -6306,6 +5072,7 @@ def plot_ihs_gwss_track( return fig + @check_types @doc( summary="Run and plot iHS GWSS data.", ) @@ -6422,6 +5189,7 @@ def _garud_g123(self, gt): return g123 + @check_types @doc( summary="Run a G123 genome-wide selection scan.", returns=dict( @@ -6515,6 +5283,7 @@ def _g123_gwss( return results + @check_types @doc( summary="Plot G123 GWSS data.", ) @@ -6598,6 +5367,7 @@ def plot_g123_gwss_track( return fig + @check_types @doc( summary="Plot G123 GWSS data.", ) @@ -6716,6 +5486,7 @@ def _load_data_for_g123( return gt, pos + @check_types @doc( summary="Generate g123 GWSS calibration data for different window sizes.", returns=""" @@ -6738,7 +5509,7 @@ def g123_calibration( ] = g123_params.max_cohort_size_default, window_sizes: g123_params.window_sizes = g123_params.window_sizes_default, random_seed: base_params.random_seed = 42, - ) -> List[np.ndarray]: + ) -> Mapping[str, np.ndarray]: # change this name if you ever change the behaviour of this function, to # invalidate any previously cached data name = self._g123_calibration_cache_name @@ -6746,7 +5517,7 @@ def g123_calibration( params = dict( contig=contig, sites=sites, - site_mask=self._prep_site_mask_param(site_mask=site_mask), + site_mask=self._prep_optional_site_mask_param(site_mask=site_mask), window_sizes=window_sizes, sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), # N.B., do not be tempted to convert this sample query into integer @@ -6779,7 +5550,7 @@ def _g123_calibration( max_cohort_size, window_sizes, random_seed, - ): + ) -> Mapping[str, np.ndarray]: gt, _ = self._load_data_for_g123( contig=contig, sites=sites, @@ -6791,7 +5562,7 @@ def _g123_calibration( random_seed=random_seed, ) - calibration_runs = dict() + calibration_runs: Dict[str, np.ndarray] = dict() for window_size in self._progress(window_sizes, desc="Compute g123"): g123 = allel.moving_statistic( gt, statistic=self._garud_g123, size=window_size @@ -6800,6 +5571,7 @@ def _g123_calibration( return calibration_runs + @check_types @doc( summary="Plot g123 GWSS calibration data for different window sizes.", ) @@ -6876,6 +5648,7 @@ def plot_g123_calibration( fig.title = title bokeh.plotting.show(fig) + @check_types @doc( summary=""" Hierarchically cluster haplotypes in region and produce an interactive plot. @@ -7038,6 +5811,7 @@ def plot_haplotype_clustering( return fig + @check_types @doc( summary=""" Construct a median-joining haplotype network and display it using diff --git a/malariagen_data/util.py b/malariagen_data/util.py index cec144ca8..c6deaa706 100644 --- a/malariagen_data/util.py +++ b/malariagen_data/util.py @@ -5,6 +5,8 @@ import sys import warnings from enum import Enum +from functools import wraps +from inspect import getcallargs from textwrap import dedent, fill from typing import IO, Dict, Hashable, List, Mapping, Optional, Tuple, Union from urllib.parse import unquote_plus @@ -17,13 +19,18 @@ import allel import dask.array as da import ipinfo +import numba import numpy as np import pandas import pandas as pd import plotly.express as px +import typeguard import xarray as xr +import zarr from fsspec.core import url_to_fs from fsspec.mapping import FSMap +from numpydoc_decorator.impl import format_type +from typing_extensions import TypeAlias, get_type_hints DIM_VARIANT = "variants" DIM_ALLELE = "alleles" @@ -160,7 +167,9 @@ class SiteClass(Enum): INTRON_LAST = 10 -def da_from_zarr(z, inline_array, chunks="auto"): +def da_from_zarr( + z: zarr.core.Array, inline_array: bool, chunks: Union[str, Tuple[int, ...]] = "auto" +) -> da.Array: """Utility function for turning a zarr array into a dask array. N.B., dask does have its own from_zarr() function, but we roll @@ -239,13 +248,14 @@ def _dask_compress_dataarray(a, indexer, dim): return v -def da_compress(indexer, data, axis): +def da_compress( + indexer: da.Array, + data: da.Array, + axis: int, +): """Wrapper for dask.array.compress() which computes chunk sizes faster.""" # sanity checks - assert isinstance(data, da.Array) - assert isinstance(indexer, da.Array) - assert isinstance(axis, int) assert indexer.shape[0] == data.shape[axis] # useful variables @@ -408,28 +418,29 @@ def _valid_contigs(resource): return valid_contigs -def resolve_region(resource, region): - """Parse the provided region and return `Region(contig, start, end)`. - Supports contig names, gene names and genomic coordinates""" +single_region_param_type: TypeAlias = Union[str, Region, Mapping] + +region_param_type: TypeAlias = Union[ + single_region_param_type, + List[single_region_param_type], + Tuple[single_region_param_type, ...], +] + +def parse_single_region(resource, region: single_region_param_type) -> Region: if isinstance(region, Region): - # the region is already a Region, nothing to do + # The region is already a Region, nothing to do. return region - if isinstance(region, dict): + if isinstance(region, Mapping): + # The region is in dictionary form, convert to Region instance. return Region( contig=region.get("contig"), start=region.get("start"), end=region.get("end"), ) - if isinstance(region, (list, tuple)): - # multiple regions, normalise to list and resolve components - return [resolve_region(resource, r) for r in region] - - # check type, fail early if bad - if not isinstance(region, str): - raise TypeError("The region parameter must be a string or Region object.") + assert isinstance(region, str) # check if region is a whole contig if region in _valid_contigs(resource): @@ -450,7 +461,34 @@ def resolve_region(resource, region): ) -def locate_region(region, pos): +def parse_multi_region( + resource, + region: region_param_type, +) -> List[Region]: + if isinstance(region, (list, tuple)): + return [parse_single_region(resource, r) for r in region] + else: + return [parse_single_region(resource, region)] + + +def resolve_region( + resource, + region: region_param_type, +) -> Union[Region, List[Region]]: + """Parse the provided region and return a `Region` object or list of + `Region` objects if multiple values provided. + + Supports contig names, gene names and genomic coordinates. + + """ + if isinstance(region, (list, tuple)): + # Multiple regions, normalise to list and resolve components. + return [parse_single_region(resource, r) for r in region] + else: + return parse_single_region(resource, region) + + +def locate_region(region: Region, pos: np.ndarray) -> slice: """Get array slice and a parsed genomic region. Parameters @@ -465,11 +503,37 @@ def locate_region(region, pos): loc_region : slice """ - pos = allel.SortedIndex(pos) - loc_region = pos.locate_range(region.start, region.end) + pos_idx = allel.SortedIndex(pos) + try: + loc_region = pos_idx.locate_range(region.start, region.end) + except KeyError: + # There are no data within the requested region, return a zero-length slice. + loc_region = slice(0, 0) return loc_region +def region_str(region: List[Region]) -> str: + """Convert a region to a string representation. + + Parameters + ---------- + region : Region or list of Region + The region to display. + + Returns + ------- + out : str + + """ + if isinstance(region, list): + if len(region) > 1: + return "; ".join([str(r) for r in region]) + else: + return str(region[0]) + else: + return str(region) + + def _simple_xarray_concat_arrays( datasets: List[xr.Dataset], names: List[Hashable], dim: str ) -> Mapping[Hashable, xr.DataArray]: @@ -573,16 +637,11 @@ def simple_xarray_concat( # ) -def type_error( - name, - value, - expectation, -): - message = ( - f"Bad type for parameter {name}; expected {expectation}, " - f"found {type(value)}" - ) - raise TypeError(message) +def da_concat(arrays: List[da.Array], **kwargs) -> da.Array: + if len(arrays) == 1: + return arrays[0] + else: + return da.concatenate(arrays, **kwargs) def value_error( @@ -596,15 +655,6 @@ def value_error( raise ValueError(message) -def check_type( - name, - value, - expectation, -): - if not isinstance(value, expectation): - type_error(name, value, expectation) - - def hash_params(params): """Helper function to hash function parameters.""" s = json.dumps(params, sort_keys=True, indent=4) @@ -839,3 +889,61 @@ def check_colab_location(*, gcs_url: str, url: str) -> Optional[ipinfo.details.D pass return details + + +def check_types(f): + """Simple decorator to provide runtime checking of parameter types. + + N.B., the typeguard package does have a decorator function called + @typechecked which performs a similar purpose. However, the typeguard + decorator causes a memory leak and doesn't seem usable. Also, the + typeguard decorator performs runtime checking of all variables within + the function as well as the arguments and return values. We only want + checking of the arguments to help users provide correct inputs. + + """ + + @wraps(f) + def wrapper(*args, **kwargs): + type_hints = get_type_hints(f) + call_args = getcallargs(f, *args, **kwargs) + for k, t in type_hints.items(): + if k in call_args: + v = call_args[k] + try: + typeguard.check_type(v, t) + except typeguard.TypeCheckError as e: + expected_type = format_type(t) + actual_type = format_type(type(v)) + message = fill( + dedent( + f""" + Parameter {k!r} with value {v!r} in call to function {f.__name__!r} has incorrect type: + found {actual_type}, expected {expected_type}. See below for further information. + """ + ) + ) + message += f"\n\n{e}" + error = TypeError(message) + raise error from None + return f(*args, **kwargs) + + return wrapper + + +@numba.njit +def true_runs(a): + in_run = False + starts = [] + stops = [] + for i in range(a.shape[0]): + v = a[i] + if not in_run and v: + in_run = True + starts.append(i) + if in_run and not v: + in_run = False + stops.append(i) + if in_run: + stops.append(a.shape[0]) + return np.array(starts, dtype=np.int64), np.array(stops, dtype=np.int64) diff --git a/notebooks/plot_snps.ipynb b/notebooks/plot_snps.ipynb index c847d0430..27a972023 100644 --- a/notebooks/plot_snps.ipynb +++ b/notebooks/plot_snps.ipynb @@ -4,20 +4,21 @@ "cell_type": "code", "execution_count": null, "id": "14640993", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "import malariagen_data\n", - "import allel\n", - "import pandas as pd\n", - "import numpy as np" + "import malariagen_data" ] }, { "cell_type": "code", "execution_count": null, "id": "5d740cac", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "ag3 = malariagen_data.Ag3(\n", @@ -31,7 +32,9 @@ "cell_type": "code", "execution_count": null, "id": "bd576b75", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "ag3.snp_variants(region=\"3L\")" @@ -41,7 +44,9 @@ "cell_type": "code", "execution_count": null, "id": "7de3ba82", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "ag3.snp_calls(region=\"3L\", sample_sets=\"AG1000G-FR\")" @@ -51,47 +56,91 @@ "cell_type": "code", "execution_count": null, "id": "82096dc0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ag3.plot_snps_track(region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-FR\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10c340aa", "metadata": {}, "outputs": [], "source": [ - "ag3.plot_snps_track(region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-FR\");" + "ag3.plot_snps_track(region=\"2L:2,363,000-2,364,000\", sample_sets=\"AG1000G-FR\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "0e3a8961", + "id": "e8e463dd", "metadata": {}, "outputs": [], "source": [ - "ag3.plot_snps_track(region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-BF-A\", cohort_size=20);" + "ag3.plot_snps_track(region=\"2L:2,364,000-2,366,000\", sample_sets=\"AG1000G-FR\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "63aca10c", + "id": "7fb757a7", "metadata": {}, "outputs": [], "source": [ - "ag3.plot_snps(region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-FR\");" + "ag3.plot_snps_track(region=\"2L:2,364,000-2,364,500\", sample_sets=\"AG1000G-FR\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e3a8961", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ag3.plot_snps_track(\n", + " region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-BF-A\", cohort_size=20\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63aca10c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ag3.plot_snps(region=\"2L:2,350,000-2,450,000\", sample_sets=\"AG1000G-FR\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6fb5216e", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "ag3.plot_snps(region=\"3R:28,550,000-28,650,000\", sample_sets=\"AG1000G-BF-A\", cohort_size=10);" + "ag3.plot_snps(\n", + " region=\"3R:28,550,000-28,650,000\", sample_sets=\"AG1000G-BF-A\", cohort_size=10\n", + ")" ] }, { "cell_type": "code", "execution_count": null, "id": "0d88ceb5", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "af1 = malariagen_data.Af1(\n", @@ -105,7 +154,9 @@ "cell_type": "code", "execution_count": null, "id": "0c98cabe", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "af1.snp_variants(region=\"3RL\")" @@ -115,7 +166,9 @@ "cell_type": "code", "execution_count": null, "id": "8a7c297d", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "af1.sample_sets()" @@ -125,7 +178,9 @@ "cell_type": "code", "execution_count": null, "id": "d0552711", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "af1.snp_calls(region=\"3RL\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\")" @@ -135,30 +190,44 @@ "cell_type": "code", "execution_count": null, "id": "d269e02e", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "af1.plot_snps_track(region=\"2RL:2,420,000-2,450,000\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\");" + "af1.plot_snps_track(\n", + " region=\"2RL:2,420,000-2,450,000\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\"\n", + ")" ] }, { "cell_type": "code", "execution_count": null, "id": "872c56ab", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "af1.plot_snps(region=\"2RL:2,420,000-2,450,000\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\");" + "af1.plot_snps(\n", + " region=\"2RL:2,420,000-2,450,000\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\"\n", + ")" ] }, { "cell_type": "code", "execution_count": null, "id": "c24a21ab", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "af1.plot_snps(region=\"2RL:2,420,000-2,450,000\", sample_sets=\"1229-VO-GH-DADZIE-VMF00095\", cohort_size=10);" + "af1.plot_snps(\n", + " region=\"2RL:2,420,000-2,450,000\",\n", + " sample_sets=\"1229-VO-GH-DADZIE-VMF00095\",\n", + " cohort_size=10,\n", + ")" ] }, { @@ -186,7 +255,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.16" + "version": "3.8.16" } }, "nbformat": 4, diff --git a/notebooks/repr.ipynb b/notebooks/repr.ipynb index 4f6aa6776..8b2beb2f5 100644 --- a/notebooks/repr.ipynb +++ b/notebooks/repr.ipynb @@ -2,9 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "aa5bad7b", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import malariagen_data" @@ -12,805 +14,48 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "24cdd3a1", - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "(function(root) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " const force = true;\n", - "\n", - " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", - " root._bokeh_onload_callbacks = [];\n", - " root._bokeh_is_loading = undefined;\n", - " }\n", - "\n", - "const JS_MIME_TYPE = 'application/javascript';\n", - " const HTML_MIME_TYPE = 'text/html';\n", - " const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", - " const CLASS_NAME = 'output_bokeh rendered_html';\n", - "\n", - " /**\n", - " * Render data to the DOM node\n", - " */\n", - " function render(props, node) {\n", - " const script = document.createElement(\"script\");\n", - " node.appendChild(script);\n", - " }\n", - "\n", - " /**\n", - " * Handle when an output is cleared or removed\n", - " */\n", - " function handleClearOutput(event, handle) {\n", - " const cell = handle.cell;\n", - "\n", - " const id = cell.output_area._bokeh_element_id;\n", - " const server_id = cell.output_area._bokeh_server_id;\n", - " // Clean up Bokeh references\n", - " if (id != null && id in Bokeh.index) {\n", - " Bokeh.index[id].model.document.clear();\n", - " delete Bokeh.index[id];\n", - " }\n", - "\n", - " if (server_id !== undefined) {\n", - " // Clean up Bokeh references\n", - " const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", - " cell.notebook.kernel.execute(cmd_clean, {\n", - " iopub: {\n", - " output: function(msg) {\n", - " const id = msg.content.text.trim();\n", - " if (id in Bokeh.index) {\n", - " Bokeh.index[id].model.document.clear();\n", - " delete Bokeh.index[id];\n", - " }\n", - " }\n", - " }\n", - " });\n", - " // Destroy server and session\n", - " const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", - " cell.notebook.kernel.execute(cmd_destroy);\n", - " }\n", - " }\n", - "\n", - " /**\n", - " * Handle when a new output is added\n", - " */\n", - " function handleAddOutput(event, handle) {\n", - " const output_area = handle.output_area;\n", - " const output = handle.output;\n", - "\n", - " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", - " if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n", - " return\n", - " }\n", - "\n", - " const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", - "\n", - " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", - " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", - " // store reference to embed id on output_area\n", - " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", - " }\n", - " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", - " const bk_div = document.createElement(\"div\");\n", - " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", - " const script_attrs = bk_div.children[0].attributes;\n", - " for (let i = 0; i < script_attrs.length; i++) {\n", - " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", - " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", - " }\n", - " // store reference to server id on output_area\n", - " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", - " }\n", - " }\n", - "\n", - " function register_renderer(events, OutputArea) {\n", - "\n", - " function append_mime(data, metadata, element) {\n", - " // create a DOM node to render to\n", - " const toinsert = this.create_output_subarea(\n", - " metadata,\n", - " CLASS_NAME,\n", - " EXEC_MIME_TYPE\n", - " );\n", - " this.keyboard_manager.register_events(toinsert);\n", - " // Render to node\n", - " const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", - " render(props, toinsert[toinsert.length - 1]);\n", - " element.append(toinsert);\n", - " return toinsert\n", - " }\n", - "\n", - " /* Handle when an output is cleared or removed */\n", - " events.on('clear_output.CodeCell', handleClearOutput);\n", - " events.on('delete.Cell', handleClearOutput);\n", - "\n", - " /* Handle when a new output is added */\n", - " events.on('output_added.OutputArea', handleAddOutput);\n", - "\n", - " /**\n", - " * Register the mime type and append_mime function with output_area\n", - " */\n", - " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", - " /* Is output safe? */\n", - " safe: true,\n", - " /* Index of renderer in `output_area.display_order` */\n", - " index: 0\n", - " });\n", - " }\n", - "\n", - " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", - " if (root.Jupyter !== undefined) {\n", - " const events = require('base/js/events');\n", - " const OutputArea = require('notebook/js/outputarea').OutputArea;\n", - "\n", - " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", - " register_renderer(events, OutputArea);\n", - " }\n", - " }\n", - " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", - " root._bokeh_timeout = Date.now() + 5000;\n", - " root._bokeh_failed_load = false;\n", - " }\n", - "\n", - " const NB_LOAD_WARNING = {'data': {'text/html':\n", - " \"
\\n\"+\n", - " \"

\\n\"+\n", - " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", - " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", - " \"

\\n\"+\n", - " \"\\n\"+\n", - " \"\\n\"+\n", - " \"from bokeh.resources import INLINE\\n\"+\n", - " \"output_notebook(resources=INLINE)\\n\"+\n", - " \"\\n\"+\n", - " \"
\"}};\n", - "\n", - " function display_loaded() {\n", - " const el = document.getElementById(null);\n", - " if (el != null) {\n", - " el.textContent = \"BokehJS is loading...\";\n", - " }\n", - " if (root.Bokeh !== undefined) {\n", - " if (el != null) {\n", - " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", - " }\n", - " } else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(display_loaded, 100)\n", - " }\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " try {\n", - " root._bokeh_onload_callbacks.forEach(function(callback) {\n", - " if (callback != null)\n", - " callback();\n", - " });\n", - " } finally {\n", - " delete root._bokeh_onload_callbacks\n", - " }\n", - " console.debug(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(css_urls, js_urls, callback) {\n", - " if (css_urls == null) css_urls = [];\n", - " if (js_urls == null) js_urls = [];\n", - "\n", - " root._bokeh_onload_callbacks.push(callback);\n", - " if (root._bokeh_is_loading > 0) {\n", - " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls == null || js_urls.length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", - "\n", - " function on_load() {\n", - " root._bokeh_is_loading--;\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", - " run_callbacks()\n", - " }\n", - " }\n", - "\n", - " function on_error(url) {\n", - " console.error(\"failed to load \" + url);\n", - " }\n", - "\n", - " for (let i = 0; i < css_urls.length; i++) {\n", - " const url = css_urls[i];\n", - " const element = document.createElement(\"link\");\n", - " element.onload = on_load;\n", - " element.onerror = on_error.bind(null, url);\n", - " element.rel = \"stylesheet\";\n", - " element.type = \"text/css\";\n", - " element.href = url;\n", - " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " for (let i = 0; i < js_urls.length; i++) {\n", - " const url = js_urls[i];\n", - " const element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error.bind(null, url);\n", - " element.async = false;\n", - " element.src = url;\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " };\n", - "\n", - " function inject_raw_css(css) {\n", - " const element = document.createElement(\"style\");\n", - " element.appendChild(document.createTextNode(css));\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\"];\n", - " const css_urls = [];\n", - "\n", - " const inline_js = [ function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - "function(Bokeh) {\n", - " }\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " if (root.Bokeh !== undefined || force === true) {\n", - " for (let i = 0; i < inline_js.length; i++) {\n", - " inline_js[i].call(root, root.Bokeh);\n", - " }\n", - "} else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!root._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " root._bokeh_failed_load = true;\n", - " } else if (force !== true) {\n", - " const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", - " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", - " }\n", - " }\n", - "\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", - " run_inline_js();\n", - " } else {\n", - " load_libs(css_urls, js_urls, function() {\n", - " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", - " run_inline_js();\n", - " });\n", - " }\n", - "}(window));" - ], - "application/vnd.bokehjs_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "ag3 = malariagen_data.Ag3()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "362dedbf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MalariaGEN Ag3 API client
\n", - " Please note that data are subject to terms of use,\n", - " for more information see \n", - " the MalariaGEN website or contact data@malariagen.net.\n", - " See also the Ag3 API docs.\n", - "
\n", - " Storage URL\n", - " gs://vo_agam_release/
\n", - " Data releases available\n", - " 3.0
\n", - " Results cache\n", - " None
\n", - " Cohorts analysis\n", - " 20220608
\n", - " Species analysis\n", - " aim_20220528
\n", - " Site filters analysis\n", - " dt_20200416
\n", - " Software version\n", - " malariagen_data 0.0.0
\n", - " Client location\n", - " England, GB
\n", - " " - ], - "text/plain": [ - "\n", - "Storage URL : gs://vo_agam_release/\n", - "Data releases available : 3.0\n", - "Results cache : None\n", - "Cohorts analysis : 20220608\n", - "Species analysis : aim_20220528\n", - "Site filters analysis : dt_20200416\n", - "Software version : malariagen_data 0.0.0\n", - "Client location : England, GB\n", - "---\n", - "Please note that data are subject to terms of use,\n", - "for more information see https://www.malariagen.net/data\n", - "or contact data@malariagen.net. For API documentation see \n", - "https://malariagen.github.io/vector-data/ag3/api.html" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "ag3" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "294fc8b7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Storage URL : gs://vo_agam_release/\n", - "Data releases available : 3.0\n", - "Results cache : None\n", - "Cohorts analysis : 20220608\n", - "Species analysis : aim_20220528\n", - "Site filters analysis : dt_20200416\n", - "Software version : malariagen_data 0.0.0\n", - "Client location : England, GB\n", - "---\n", - "Please note that data are subject to terms of use,\n", - "for more information see https://www.malariagen.net/data\n", - "or contact data@malariagen.net. For API documentation see \n", - "https://malariagen.github.io/vector-data/ag3/api.html\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "print(repr(ag3))" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9ca832ac", - "metadata": {}, - "outputs": [ - { - "data": { - "application/javascript": [ - "(function(root) {\n", - " function now() {\n", - " return new Date();\n", - " }\n", - "\n", - " const force = true;\n", - "\n", - " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", - " root._bokeh_onload_callbacks = [];\n", - " root._bokeh_is_loading = undefined;\n", - " }\n", - "\n", - "const JS_MIME_TYPE = 'application/javascript';\n", - " const HTML_MIME_TYPE = 'text/html';\n", - " const EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", - " const CLASS_NAME = 'output_bokeh rendered_html';\n", - "\n", - " /**\n", - " * Render data to the DOM node\n", - " */\n", - " function render(props, node) {\n", - " const script = document.createElement(\"script\");\n", - " node.appendChild(script);\n", - " }\n", - "\n", - " /**\n", - " * Handle when an output is cleared or removed\n", - " */\n", - " function handleClearOutput(event, handle) {\n", - " const cell = handle.cell;\n", - "\n", - " const id = cell.output_area._bokeh_element_id;\n", - " const server_id = cell.output_area._bokeh_server_id;\n", - " // Clean up Bokeh references\n", - " if (id != null && id in Bokeh.index) {\n", - " Bokeh.index[id].model.document.clear();\n", - " delete Bokeh.index[id];\n", - " }\n", - "\n", - " if (server_id !== undefined) {\n", - " // Clean up Bokeh references\n", - " const cmd_clean = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", - " cell.notebook.kernel.execute(cmd_clean, {\n", - " iopub: {\n", - " output: function(msg) {\n", - " const id = msg.content.text.trim();\n", - " if (id in Bokeh.index) {\n", - " Bokeh.index[id].model.document.clear();\n", - " delete Bokeh.index[id];\n", - " }\n", - " }\n", - " }\n", - " });\n", - " // Destroy server and session\n", - " const cmd_destroy = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", - " cell.notebook.kernel.execute(cmd_destroy);\n", - " }\n", - " }\n", - "\n", - " /**\n", - " * Handle when a new output is added\n", - " */\n", - " function handleAddOutput(event, handle) {\n", - " const output_area = handle.output_area;\n", - " const output = handle.output;\n", - "\n", - " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", - " if ((output.output_type != \"display_data\") || (!Object.prototype.hasOwnProperty.call(output.data, EXEC_MIME_TYPE))) {\n", - " return\n", - " }\n", - "\n", - " const toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", - "\n", - " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", - " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", - " // store reference to embed id on output_area\n", - " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", - " }\n", - " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", - " const bk_div = document.createElement(\"div\");\n", - " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", - " const script_attrs = bk_div.children[0].attributes;\n", - " for (let i = 0; i < script_attrs.length; i++) {\n", - " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", - " toinsert[toinsert.length - 1].firstChild.textContent = bk_div.children[0].textContent\n", - " }\n", - " // store reference to server id on output_area\n", - " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", - " }\n", - " }\n", - "\n", - " function register_renderer(events, OutputArea) {\n", - "\n", - " function append_mime(data, metadata, element) {\n", - " // create a DOM node to render to\n", - " const toinsert = this.create_output_subarea(\n", - " metadata,\n", - " CLASS_NAME,\n", - " EXEC_MIME_TYPE\n", - " );\n", - " this.keyboard_manager.register_events(toinsert);\n", - " // Render to node\n", - " const props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", - " render(props, toinsert[toinsert.length - 1]);\n", - " element.append(toinsert);\n", - " return toinsert\n", - " }\n", - "\n", - " /* Handle when an output is cleared or removed */\n", - " events.on('clear_output.CodeCell', handleClearOutput);\n", - " events.on('delete.Cell', handleClearOutput);\n", - "\n", - " /* Handle when a new output is added */\n", - " events.on('output_added.OutputArea', handleAddOutput);\n", - "\n", - " /**\n", - " * Register the mime type and append_mime function with output_area\n", - " */\n", - " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", - " /* Is output safe? */\n", - " safe: true,\n", - " /* Index of renderer in `output_area.display_order` */\n", - " index: 0\n", - " });\n", - " }\n", - "\n", - " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", - " if (root.Jupyter !== undefined) {\n", - " const events = require('base/js/events');\n", - " const OutputArea = require('notebook/js/outputarea').OutputArea;\n", - "\n", - " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", - " register_renderer(events, OutputArea);\n", - " }\n", - " }\n", - " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", - " root._bokeh_timeout = Date.now() + 5000;\n", - " root._bokeh_failed_load = false;\n", - " }\n", - "\n", - " const NB_LOAD_WARNING = {'data': {'text/html':\n", - " \"
\\n\"+\n", - " \"

\\n\"+\n", - " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", - " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", - " \"

\\n\"+\n", - " \"
    \\n\"+\n", - " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", - " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", - " \"
\\n\"+\n", - " \"\\n\"+\n", - " \"from bokeh.resources import INLINE\\n\"+\n", - " \"output_notebook(resources=INLINE)\\n\"+\n", - " \"\\n\"+\n", - " \"
\"}};\n", - "\n", - " function display_loaded() {\n", - " const el = document.getElementById(null);\n", - " if (el != null) {\n", - " el.textContent = \"BokehJS is loading...\";\n", - " }\n", - " if (root.Bokeh !== undefined) {\n", - " if (el != null) {\n", - " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", - " }\n", - " } else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(display_loaded, 100)\n", - " }\n", - " }\n", - "\n", - " function run_callbacks() {\n", - " try {\n", - " root._bokeh_onload_callbacks.forEach(function(callback) {\n", - " if (callback != null)\n", - " callback();\n", - " });\n", - " } finally {\n", - " delete root._bokeh_onload_callbacks\n", - " }\n", - " console.debug(\"Bokeh: all callbacks have finished\");\n", - " }\n", - "\n", - " function load_libs(css_urls, js_urls, callback) {\n", - " if (css_urls == null) css_urls = [];\n", - " if (js_urls == null) js_urls = [];\n", - "\n", - " root._bokeh_onload_callbacks.push(callback);\n", - " if (root._bokeh_is_loading > 0) {\n", - " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", - " return null;\n", - " }\n", - " if (js_urls == null || js_urls.length === 0) {\n", - " run_callbacks();\n", - " return null;\n", - " }\n", - " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", - " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", - "\n", - " function on_load() {\n", - " root._bokeh_is_loading--;\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", - " run_callbacks()\n", - " }\n", - " }\n", - "\n", - " function on_error(url) {\n", - " console.error(\"failed to load \" + url);\n", - " }\n", - "\n", - " for (let i = 0; i < css_urls.length; i++) {\n", - " const url = css_urls[i];\n", - " const element = document.createElement(\"link\");\n", - " element.onload = on_load;\n", - " element.onerror = on_error.bind(null, url);\n", - " element.rel = \"stylesheet\";\n", - " element.type = \"text/css\";\n", - " element.href = url;\n", - " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " for (let i = 0; i < js_urls.length; i++) {\n", - " const url = js_urls[i];\n", - " const element = document.createElement('script');\n", - " element.onload = on_load;\n", - " element.onerror = on_error.bind(null, url);\n", - " element.async = false;\n", - " element.src = url;\n", - " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", - " document.head.appendChild(element);\n", - " }\n", - " };\n", - "\n", - " function inject_raw_css(css) {\n", - " const element = document.createElement(\"style\");\n", - " element.appendChild(document.createTextNode(css));\n", - " document.body.appendChild(element);\n", - " }\n", - "\n", - " const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\"];\n", - " const css_urls = [];\n", - "\n", - " const inline_js = [ function(Bokeh) {\n", - " Bokeh.set_log_level(\"info\");\n", - " },\n", - "function(Bokeh) {\n", - " }\n", - " ];\n", - "\n", - " function run_inline_js() {\n", - " if (root.Bokeh !== undefined || force === true) {\n", - " for (let i = 0; i < inline_js.length; i++) {\n", - " inline_js[i].call(root, root.Bokeh);\n", - " }\n", - "} else if (Date.now() < root._bokeh_timeout) {\n", - " setTimeout(run_inline_js, 100);\n", - " } else if (!root._bokeh_failed_load) {\n", - " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", - " root._bokeh_failed_load = true;\n", - " } else if (force !== true) {\n", - " const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", - " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", - " }\n", - " }\n", - "\n", - " if (root._bokeh_is_loading === 0) {\n", - " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", - " run_inline_js();\n", - " } else {\n", - " load_libs(css_urls, js_urls, function() {\n", - " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", - " run_inline_js();\n", - " });\n", - " }\n", - "}(window));" - ], - "application/vnd.bokehjs_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n const NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n const el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error(url) {\n console.error(\"failed to load \" + url);\n }\n\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error.bind(null, url);\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\"];\n const css_urls = [];\n\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {\n }\n ];\n\n function run_inline_js() {\n if (root.Bokeh !== undefined || force === true) {\n for (let i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n const cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MalariaGEN Af1 API client
\n", - " Please note that data are subject to terms of use,\n", - " for more information see \n", - " the MalariaGEN website or contact data@malariagen.net.\n", - "
\n", - " Storage URL\n", - " gs://vo_afun_release/
\n", - " Data releases available\n", - " 1.0
\n", - " Results cache\n", - " None
\n", - " Cohorts analysis\n", - " 20221129
\n", - " Site filters analysis\n", - " dt_20200416
\n", - " Software version\n", - " malariagen_data 0.0.0
\n", - " Client location\n", - " England, GB
\n", - " " - ], - "text/plain": [ - "\n", - "Storage URL : gs://vo_afun_release/\n", - "Data releases available : 1.0\n", - "Results cache : None\n", - "Cohorts analysis : 20221129\n", - "Site filters analysis : dt_20200416\n", - "Software version : malariagen_data 0.0.0\n", - "Client location : England, GB\n", - "---\n", - "Please note that data are subject to terms of use,\n", - "for more information see https://www.malariagen.net/data\n", - "or contact data@malariagen.net." - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "af1 = malariagen_data.Af1()\n", "af1" @@ -818,29 +63,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "73e79af6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Storage URL : gs://vo_afun_release/\n", - "Data releases available : 1.0\n", - "Results cache : None\n", - "Cohorts analysis : 20221129\n", - "Site filters analysis : dt_20200416\n", - "Software version : malariagen_data 0.0.0\n", - "Client location : England, GB\n", - "---\n", - "Please note that data are subject to terms of use,\n", - "for more information see https://www.malariagen.net/data\n", - "or contact data@malariagen.net.\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ "print(repr(af1))" ] @@ -870,7 +98,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.16" + "version": "3.8.16" } }, "nbformat": 4, diff --git a/notebooks/spike_sim_test_data.ipynb b/notebooks/spike_sim_test_data.ipynb new file mode 100644 index 000000000..6960743b6 --- /dev/null +++ b/notebooks/spike_sim_test_data.ipynb @@ -0,0 +1,993 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Explore real data to help with simulating test data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import malariagen_data\n", + "import allel\n", + "import numpy as np\n", + "import plotly.express as px\n", + "import plotly.io as pio\n", + "\n", + "pio.templates.default = \"plotly_dark\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ag3 = malariagen_data.Ag3(\n", + " \"simplecache::gs://vo_agam_release\",\n", + " simplecache=dict(cache_storage=\"../gcs_cache\"),\n", + ")\n", + "ag3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "af1 = malariagen_data.Af1(\n", + " \"simplecache::gs://vo_afun_release\",\n", + " simplecache=dict(cache_storage=\"../gcs_cache\"),\n", + ")\n", + "af1" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Site annotations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ann = ag3.open_site_annotations()\n", + "ann" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(ann)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(ann.tree())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "codon_degeneracy = ann[\"codon_degeneracy\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(codon_degeneracy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(codon_degeneracy + 1) / codon_degeneracy.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "codon_nonsyn = ann[\"codon_nonsyn\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(codon_nonsyn)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(codon_nonsyn) / codon_nonsyn.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "codon_position = ann[\"codon_position\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(codon_position)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(codon_position + 1) / codon_position.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seq_cls = ann[\"seq_cls\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(seq_cls)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(seq_cls) / seq_cls.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seq_flen = ann[\"seq_flen\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(seq_flen, nbins=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seq_relpos_start = ann[\"seq_relpos_start\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(seq_relpos_start, nbins=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seq_relpos_stop = ann[\"seq_relpos_stop\"][\"3L\"][10_000_000:11_000_000]\n", + "px.histogram(seq_relpos_stop, nbins=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.random.beta(a=0.4, b=4, size=100_000) * 40_000\n", + "px.histogram(x, nbins=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "list(ann)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SNP calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ag3.snp_calls(region=\"3L\", sample_sets=\"AG1000G-BF-A\")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds2 = ds.isel(variants=slice(10_500_000, 10_600_000))\n", + "ds2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt = ds2[\"call_genotype\"].values\n", + "gt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt2 = allel.GenotypeArray(gt)\n", + "gt2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(gt.flatten() + 1) / gt.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "missing_fraction = gt2.count_missing() / (gt2.n_variants * gt2.n_samples)\n", + "missing_fraction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allele_counts = np.bincount(gt.flatten() + 1)[1:]\n", + "allele_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allele_fractions = allele_counts / np.sum(allele_counts)\n", + "allele_fractions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt_sim = np.random.choice(4, size=gt.shape, replace=True, p=allele_fractions)\n", + "gt_sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_calls = gt_sim.shape[0] * gt_sim.shape[1]\n", + "n_calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt_sim_flat = gt_sim.reshape(-1, 2)\n", + "gt_sim_flat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "loc_missing = np.random.choice(\n", + " n_calls,\n", + " size=int(missing_fraction * n_calls),\n", + " replace=False,\n", + ")\n", + "loc_missing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt_sim_flat[loc_missing] = -1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt_sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(gt_sim.flatten() + 1) / gt_sim.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.bincount(gt.flatten() + 1) / gt.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt).count_missing()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt_sim).count_missing()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt).count_hom_ref()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt_sim).count_hom_ref()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt).count_hom_alt()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allel.GenotypeArray(gt_sim).count_hom_alt()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gq = ds2[\"call_GQ\"].values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.histogram(gq.flatten()[:100_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gqf = gq.flatten()\n", + "n_gq = np.bincount(gqf[gqf >= 0])\n", + "n_gq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_gq = np.bincount(gqf[gqf >= 0]) / gqf.size\n", + "p_gq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.bar(p_gq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mq = ds2[\"call_MQ\"].values\n", + "mq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mqf = mq.flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.histogram(mqf[:100_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_mq = np.bincount(mqf[mqf >= 0]) / mqf.size\n", + "p_mq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.bar(p_mq)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ad = ds2[\"call_AD\"].values\n", + "ad" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ad0 = ad[:, :, 0].flatten()\n", + "ad0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ad1 = ad[:, :, 1].flatten()\n", + "ad1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ad2 = ad[:, :, 2].flatten()\n", + "ad3 = ad[:, :, 3].flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.histogram(ad0[:10_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.histogram(ad1[:10_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.histogram(ad2[:10_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_ad0 = np.bincount(ad0[ad0 >= 0]) / ad0.size\n", + "px.bar(p_ad0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_ad1 = np.bincount(ad1[ad1 >= 2]) / ad1.size\n", + "px.bar(p_ad1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_ad2 = np.bincount(ad2[ad2 >= 2]) / ad1.size\n", + "px.bar(p_ad2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_ad3 = np.bincount(ad3[ad3 >= 2]) / ad1.size\n", + "px.bar(p_ad3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pos = ds2[\"variant_position\"].values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "px.line(pos[:100_000])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "alleles = ds2[\"variant_allele\"].values[:10]\n", + "alleles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ref = alleles[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "alt_sim = np.empty(shape=(alleles.shape[0], alleles.shape[1] - 1), dtype=\"S1\")\n", + "alt_sim[ref == b\"A\"] = np.array([b\"C\", b\"T\", b\"G\"])\n", + "alt_sim[ref == b\"C\"] = np.array([b\"A\", b\"T\", b\"G\"])\n", + "alt_sim[ref == b\"T\"] = np.array([b\"A\", b\"C\", b\"G\"])\n", + "alt_sim[ref == b\"G\"] = np.array([b\"A\", b\"C\", b\"T\"])\n", + "alt_sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pass_gc = ds2[\"variant_filter_pass_gamb_colu\"].values\n", + "p_pass_gc = np.sum(pass_gc) / pass_gc.size\n", + "p_pass_gc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pass_a = ds2[\"variant_filter_pass_arab\"].values\n", + "p_pass_a = np.sum(pass_a) / pass_a.size\n", + "p_pass_a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pass_gca = ds2[\"variant_filter_pass_gamb_colu_arab\"].values\n", + "p_pass_gca = np.sum(pass_gca) / pass_gca.size\n", + "p_pass_gca" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sequence composition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ag_seq = ag3.genome_sequence(\"3L\").compute()\n", + "ag_seq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ag_seq_count = Counter(ag_seq)\n", + "ag_seq_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filter_pass = af1.site_filters(region=\"3RL:10,000,000-11,000,000\", mask=\"funestus\")\n", + "filter_pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.sum(filter_pass).compute() / filter_pass.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "af_seq = af1.genome_sequence(\"3RL\")[:60_000_000].compute()\n", + "af_seq" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bases = np.array([b\"a\", b\"c\", b\"g\", b\"t\", b\"n\", b\"A\", b\"C\", b\"G\", b\"T\", b\"N\"])\n", + "bases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "af_seq_count = Counter(af_seq)\n", + "af_seq_count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_bases_af = {b: af_seq_count[b] / af_seq.size for b in bases}\n", + "p_bases_af" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_ad0 = np.bincount(ad0[ad0 >= 0]) / ad0.size\n", + "px.bar(p_ad0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_bases_ag = {b: ag_seq_count[b] / ag_seq.size for b in bases}\n", + "p_bases_ag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_bases_ag = np.array([ag_seq_count[b] for b in bases]) / ag_seq.size\n", + "p_bases_ag" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "p_bases_ag.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "seq_sim = np.random.choice(bases, size=ag_seq.size, replace=True, p=p_bases_ag)\n", + "seq_sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "af1.sample_sets()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = af1.snp_calls(\n", + " region=\"3RL:10_000_000-10_500_000\", sample_sets=\"1230-VO-GA-CF-AYALA-VMF00045\"\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gt = ds[\"call_genotype\"].values\n", + "gt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allele_counts = np.bincount(gt.flatten() + 1)[1:]\n", + "allele_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allele_counts / np.sum(allele_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.sum(gt < 0) / gt.size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "malariagen-data-4GfTKESx-py3.8", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/poetry.lock b/poetry.lock index def1071f7..5da81bc26 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,16 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. - -[[package]] -name = "aiofiles" -version = "22.1.0" -description = "File support for asyncio." -category = "dev" -optional = false -python-versions = ">=3.7,<4.0" -files = [ - {file = "aiofiles-22.1.0-py3-none-any.whl", hash = "sha256:1142fa8e80dbae46bb6339573ad4c8c0841358f79c6eb50a493dceca14621bad"}, - {file = "aiofiles-22.1.0.tar.gz", hash = "sha256:9107f1ca0b2a5553987a94a3c9959fe5b491fdf731389aa5b7b1bd0733e32de6"}, -] +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "aiohttp" @@ -136,18 +124,6 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" -[[package]] -name = "aiosqlite" -version = "0.18.0" -description = "asyncio bridge to the standard sqlite3 module" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "aiosqlite-0.18.0-py3-none-any.whl", hash = "sha256:c3511b841e3a2c5614900ba1d179f366826857586f78abd75e7cbeb88e75a557"}, - {file = "aiosqlite-0.18.0.tar.gz", hash = "sha256:faa843ef5fb08bafe9a9b3859012d3d9d6f77ce3637899de20606b7fc39aa213"}, -] - [[package]] name = "ansi2html" version = "1.8.0" @@ -299,6 +275,21 @@ six = "*" [package.extras] test = ["astroid", "pytest"] +[[package]] +name = "async-lru" +version = "2.0.2" +description = "Simple LRU cache for asyncio" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "async-lru-2.0.2.tar.gz", hash = "sha256:3b87ec4f2460c52cc7916a0138cc606b584c75d1ef7d661853c95d1d3acb869a"}, + {file = "async_lru-2.0.2-py3-none-any.whl", hash = "sha256:d7c2b873e9af5c5a1f0a87a6c145e7e0b4eb92342b7235dda9dd5b10e950d6e2"}, +] + +[package.dependencies] +typing-extensions = ">=4.0.0" + [[package]] name = "async-timeout" version = "4.0.2" @@ -313,22 +304,22 @@ files = [ [[package]] name = "attrs" -version = "22.2.0" +version = "23.1.0" description = "Classes Without Boilerplate" category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, - {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, ] [package.extras] -cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] -dev = ["attrs[docs,tests]"] -docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] -tests = ["attrs[tests-no-zope]", "zope.interface"] -tests-no-zope = ["cloudpickle", "cloudpickle", "hypothesis", "hypothesis", "mypy (>=0.971,<0.990)", "mypy (>=0.971,<0.990)", "pympler", "pympler", "pytest (>=4.3.0)", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-mypy-plugins", "pytest-xdist[psutil]", "pytest-xdist[psutil]"] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] [[package]] name = "babel" @@ -359,14 +350,14 @@ files = [ [[package]] name = "beautifulsoup4" -version = "4.12.0" +version = "4.12.2" description = "Screen-scraping library" category = "dev" optional = false python-versions = ">=3.6.0" files = [ - {file = "beautifulsoup4-4.12.0-py3-none-any.whl", hash = "sha256:2130a5ad7f513200fae61a17abb5e338ca980fa28c439c0571014bc0217e9591"}, - {file = "beautifulsoup4-4.12.0.tar.gz", hash = "sha256:c5fceeaec29d09c84970e47c65f2f0efe57872f7cff494c9691a26ec0ff13234"}, + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, ] [package.dependencies] @@ -482,6 +473,18 @@ webencodings = "*" [package.extras] css = ["tinycss2 (>=1.1.0,<1.2)"] +[[package]] +name = "blinker" +version = "1.6.2" +description = "Fast, simple object-to-object and broadcast signaling" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "blinker-1.6.2-py3-none-any.whl", hash = "sha256:c3d739772abb7bc2860abf5f2ec284223d9ad5c76da018234f6f50d6f31ab1f0"}, + {file = "blinker-1.6.2.tar.gz", hash = "sha256:4afd3de66ef3a9f8067559fb7a1cbe555c17dcbe15971b05d1b625c3e7abe213"}, +] + [[package]] name = "bokeh" version = "2.4.3" @@ -532,14 +535,14 @@ files = [ [[package]] name = "certifi" -version = "2022.12.7" +version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, - {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, + {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"}, + {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"}, ] [[package]] @@ -777,63 +780,63 @@ typing = ["mypy (>=0.990)"] [[package]] name = "coverage" -version = "7.2.2" +version = "7.2.5" description = "Code coverage measurement for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "coverage-7.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c90e73bdecb7b0d1cea65a08cb41e9d672ac6d7995603d6465ed4914b98b9ad7"}, - {file = "coverage-7.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e2926b8abedf750c2ecf5035c07515770944acf02e1c46ab08f6348d24c5f94d"}, - {file = "coverage-7.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57b77b9099f172804e695a40ebaa374f79e4fb8b92f3e167f66facbf92e8e7f5"}, - {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:efe1c0adad110bf0ad7fb59f833880e489a61e39d699d37249bdf42f80590169"}, - {file = "coverage-7.2.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2199988e0bc8325d941b209f4fd1c6fa007024b1442c5576f1a32ca2e48941e6"}, - {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:81f63e0fb74effd5be736cfe07d710307cc0a3ccb8f4741f7f053c057615a137"}, - {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:186e0fc9cf497365036d51d4d2ab76113fb74f729bd25da0975daab2e107fd90"}, - {file = "coverage-7.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:420f94a35e3e00a2b43ad5740f935358e24478354ce41c99407cddd283be00d2"}, - {file = "coverage-7.2.2-cp310-cp310-win32.whl", hash = "sha256:38004671848b5745bb05d4d621526fca30cee164db42a1f185615f39dc997292"}, - {file = "coverage-7.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:0ce383d5f56d0729d2dd40e53fe3afeb8f2237244b0975e1427bfb2cf0d32bab"}, - {file = "coverage-7.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3eb55b7b26389dd4f8ae911ba9bc8c027411163839dea4c8b8be54c4ee9ae10b"}, - {file = "coverage-7.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d2b96123a453a2d7f3995ddb9f28d01fd112319a7a4d5ca99796a7ff43f02af5"}, - {file = "coverage-7.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:299bc75cb2a41e6741b5e470b8c9fb78d931edbd0cd009c58e5c84de57c06731"}, - {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e1df45c23d4230e3d56d04414f9057eba501f78db60d4eeecfcb940501b08fd"}, - {file = "coverage-7.2.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:006ed5582e9cbc8115d2e22d6d2144a0725db542f654d9d4fda86793832f873d"}, - {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d683d230b5774816e7d784d7ed8444f2a40e7a450e5720d58af593cb0b94a212"}, - {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8efb48fa743d1c1a65ee8787b5b552681610f06c40a40b7ef94a5b517d885c54"}, - {file = "coverage-7.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c752d5264053a7cf2fe81c9e14f8a4fb261370a7bb344c2a011836a96fb3f57"}, - {file = "coverage-7.2.2-cp311-cp311-win32.whl", hash = "sha256:55272f33da9a5d7cccd3774aeca7a01e500a614eaea2a77091e9be000ecd401d"}, - {file = "coverage-7.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:92ebc1619650409da324d001b3a36f14f63644c7f0a588e331f3b0f67491f512"}, - {file = "coverage-7.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5afdad4cc4cc199fdf3e18088812edcf8f4c5a3c8e6cb69127513ad4cb7471a9"}, - {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0484d9dd1e6f481b24070c87561c8d7151bdd8b044c93ac99faafd01f695c78e"}, - {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d530191aa9c66ab4f190be8ac8cc7cfd8f4f3217da379606f3dd4e3d83feba69"}, - {file = "coverage-7.2.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ac0f522c3b6109c4b764ffec71bf04ebc0523e926ca7cbe6c5ac88f84faced0"}, - {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ba279aae162b20444881fc3ed4e4f934c1cf8620f3dab3b531480cf602c76b7f"}, - {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:53d0fd4c17175aded9c633e319360d41a1f3c6e352ba94edcb0fa5167e2bad67"}, - {file = "coverage-7.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c99cb7c26a3039a8a4ee3ca1efdde471e61b4837108847fb7d5be7789ed8fd9"}, - {file = "coverage-7.2.2-cp37-cp37m-win32.whl", hash = "sha256:5cc0783844c84af2522e3a99b9b761a979a3ef10fb87fc4048d1ee174e18a7d8"}, - {file = "coverage-7.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:817295f06eacdc8623dc4df7d8b49cea65925030d4e1e2a7c7218380c0072c25"}, - {file = "coverage-7.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6146910231ece63facfc5984234ad1b06a36cecc9fd0c028e59ac7c9b18c38c6"}, - {file = "coverage-7.2.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:387fb46cb8e53ba7304d80aadca5dca84a2fbf6fe3faf6951d8cf2d46485d1e5"}, - {file = "coverage-7.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:046936ab032a2810dcaafd39cc4ef6dd295df1a7cbead08fe996d4765fca9fe4"}, - {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e627dee428a176ffb13697a2c4318d3f60b2ccdde3acdc9b3f304206ec130ccd"}, - {file = "coverage-7.2.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fa54fb483decc45f94011898727802309a109d89446a3c76387d016057d2c84"}, - {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3668291b50b69a0c1ef9f462c7df2c235da3c4073f49543b01e7eb1dee7dd540"}, - {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7c20b731211261dc9739bbe080c579a1835b0c2d9b274e5fcd903c3a7821cf88"}, - {file = "coverage-7.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5764e1f7471cb8f64b8cda0554f3d4c4085ae4b417bfeab236799863703e5de2"}, - {file = "coverage-7.2.2-cp38-cp38-win32.whl", hash = "sha256:4f01911c010122f49a3e9bdc730eccc66f9b72bd410a3a9d3cb8448bb50d65d3"}, - {file = "coverage-7.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:c448b5c9e3df5448a362208b8d4b9ed85305528313fca1b479f14f9fe0d873b8"}, - {file = "coverage-7.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bfe7085783cda55e53510482fa7b5efc761fad1abe4d653b32710eb548ebdd2d"}, - {file = "coverage-7.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9d22e94e6dc86de981b1b684b342bec5e331401599ce652900ec59db52940005"}, - {file = "coverage-7.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:507e4720791977934bba016101579b8c500fb21c5fa3cd4cf256477331ddd988"}, - {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bc4803779f0e4b06a2361f666e76f5c2e3715e8e379889d02251ec911befd149"}, - {file = "coverage-7.2.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db8c2c5ace167fd25ab5dd732714c51d4633f58bac21fb0ff63b0349f62755a8"}, - {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f68ee32d7c4164f1e2c8797535a6d0a3733355f5861e0f667e37df2d4b07140"}, - {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d52f0a114b6a58305b11a5cdecd42b2e7f1ec77eb20e2b33969d702feafdd016"}, - {file = "coverage-7.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:797aad79e7b6182cb49c08cc5d2f7aa7b2128133b0926060d0a8889ac43843be"}, - {file = "coverage-7.2.2-cp39-cp39-win32.whl", hash = "sha256:db45eec1dfccdadb179b0f9ca616872c6f700d23945ecc8f21bb105d74b1c5fc"}, - {file = "coverage-7.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:8dbe2647bf58d2c5a6c5bcc685f23b5f371909a5624e9f5cd51436d6a9f6c6ef"}, - {file = "coverage-7.2.2-pp37.pp38.pp39-none-any.whl", hash = "sha256:872d6ce1f5be73f05bea4df498c140b9e7ee5418bfa2cc8204e7f9b817caa968"}, - {file = "coverage-7.2.2.tar.gz", hash = "sha256:36dd42da34fe94ed98c39887b86db9d06777b1c8f860520e21126a75507024f2"}, + {file = "coverage-7.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:883123d0bbe1c136f76b56276074b0c79b5817dd4238097ffa64ac67257f4b6c"}, + {file = "coverage-7.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d2fbc2a127e857d2f8898aaabcc34c37771bf78a4d5e17d3e1f5c30cd0cbc62a"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f3671662dc4b422b15776cdca89c041a6349b4864a43aa2350b6b0b03bbcc7f"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780551e47d62095e088f251f5db428473c26db7829884323e56d9c0c3118791a"}, + {file = "coverage-7.2.5-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:066b44897c493e0dcbc9e6a6d9f8bbb6607ef82367cf6810d387c09f0cd4fe9a"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b9a4ee55174b04f6af539218f9f8083140f61a46eabcaa4234f3c2a452c4ed11"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:706ec567267c96717ab9363904d846ec009a48d5f832140b6ad08aad3791b1f5"}, + {file = "coverage-7.2.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ae453f655640157d76209f42c62c64c4d4f2c7f97256d3567e3b439bd5c9b06c"}, + {file = "coverage-7.2.5-cp310-cp310-win32.whl", hash = "sha256:f81c9b4bd8aa747d417407a7f6f0b1469a43b36a85748145e144ac4e8d303cb5"}, + {file = "coverage-7.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:dc945064a8783b86fcce9a0a705abd7db2117d95e340df8a4333f00be5efb64c"}, + {file = "coverage-7.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40cc0f91c6cde033da493227797be2826cbf8f388eaa36a0271a97a332bfd7ce"}, + {file = "coverage-7.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a66e055254a26c82aead7ff420d9fa8dc2da10c82679ea850d8feebf11074d88"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c10fbc8a64aa0f3ed136b0b086b6b577bc64d67d5581acd7cc129af52654384e"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a22cbb5ede6fade0482111fa7f01115ff04039795d7092ed0db43522431b4f2"}, + {file = "coverage-7.2.5-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:292300f76440651529b8ceec283a9370532f4ecba9ad67d120617021bb5ef139"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7ff8f3fb38233035028dbc93715551d81eadc110199e14bbbfa01c5c4a43f8d8"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:a08c7401d0b24e8c2982f4e307124b671c6736d40d1c39e09d7a8687bddf83ed"}, + {file = "coverage-7.2.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef9659d1cda9ce9ac9585c045aaa1e59223b143f2407db0eaee0b61a4f266fb6"}, + {file = "coverage-7.2.5-cp311-cp311-win32.whl", hash = "sha256:30dcaf05adfa69c2a7b9f7dfd9f60bc8e36b282d7ed25c308ef9e114de7fc23b"}, + {file = "coverage-7.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:97072cc90f1009386c8a5b7de9d4fc1a9f91ba5ef2146c55c1f005e7b5c5e068"}, + {file = "coverage-7.2.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:bebea5f5ed41f618797ce3ffb4606c64a5de92e9c3f26d26c2e0aae292f015c1"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828189fcdda99aae0d6bf718ea766b2e715eabc1868670a0a07bf8404bf58c33"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e8a95f243d01ba572341c52f89f3acb98a3b6d1d5d830efba86033dd3687ade"}, + {file = "coverage-7.2.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8834e5f17d89e05697c3c043d3e58a8b19682bf365048837383abfe39adaed5"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d1f25ee9de21a39b3a8516f2c5feb8de248f17da7eead089c2e04aa097936b47"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1637253b11a18f453e34013c665d8bf15904c9e3c44fbda34c643fbdc9d452cd"}, + {file = "coverage-7.2.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8e575a59315a91ccd00c7757127f6b2488c2f914096077c745c2f1ba5b8c0969"}, + {file = "coverage-7.2.5-cp37-cp37m-win32.whl", hash = "sha256:509ecd8334c380000d259dc66feb191dd0a93b21f2453faa75f7f9cdcefc0718"}, + {file = "coverage-7.2.5-cp37-cp37m-win_amd64.whl", hash = "sha256:12580845917b1e59f8a1c2ffa6af6d0908cb39220f3019e36c110c943dc875b0"}, + {file = "coverage-7.2.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b5016e331b75310610c2cf955d9f58a9749943ed5f7b8cfc0bb89c6134ab0a84"}, + {file = "coverage-7.2.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:373ea34dca98f2fdb3e5cb33d83b6d801007a8074f992b80311fc589d3e6b790"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a063aad9f7b4c9f9da7b2550eae0a582ffc7623dca1c925e50c3fbde7a579771"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38c0a497a000d50491055805313ed83ddba069353d102ece8aef5d11b5faf045"}, + {file = "coverage-7.2.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b3b05e22a77bb0ae1a3125126a4e08535961c946b62f30985535ed40e26614"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0342a28617e63ad15d96dca0f7ae9479a37b7d8a295f749c14f3436ea59fdcb3"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:cf97ed82ca986e5c637ea286ba2793c85325b30f869bf64d3009ccc1a31ae3fd"}, + {file = "coverage-7.2.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c2c41c1b1866b670573657d584de413df701f482574bad7e28214a2362cb1fd1"}, + {file = "coverage-7.2.5-cp38-cp38-win32.whl", hash = "sha256:10b15394c13544fce02382360cab54e51a9e0fd1bd61ae9ce012c0d1e103c813"}, + {file = "coverage-7.2.5-cp38-cp38-win_amd64.whl", hash = "sha256:a0b273fe6dc655b110e8dc89b8ec7f1a778d78c9fd9b4bda7c384c8906072212"}, + {file = "coverage-7.2.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5c587f52c81211d4530fa6857884d37f514bcf9453bdeee0ff93eaaf906a5c1b"}, + {file = "coverage-7.2.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4436cc9ba5414c2c998eaedee5343f49c02ca93b21769c5fdfa4f9d799e84200"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6599bf92f33ab041e36e06d25890afbdf12078aacfe1f1d08c713906e49a3fe5"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:857abe2fa6a4973f8663e039ead8d22215d31db613ace76e4a98f52ec919068e"}, + {file = "coverage-7.2.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f5cab2d7f0c12f8187a376cc6582c477d2df91d63f75341307fcdcb5d60303"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aa387bd7489f3e1787ff82068b295bcaafbf6f79c3dad3cbc82ef88ce3f48ad3"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:156192e5fd3dbbcb11cd777cc469cf010a294f4c736a2b2c891c77618cb1379a"}, + {file = "coverage-7.2.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bd3b4b8175c1db502adf209d06136c000df4d245105c8839e9d0be71c94aefe1"}, + {file = "coverage-7.2.5-cp39-cp39-win32.whl", hash = "sha256:ddc5a54edb653e9e215f75de377354e2455376f416c4378e1d43b08ec50acc31"}, + {file = "coverage-7.2.5-cp39-cp39-win_amd64.whl", hash = "sha256:338aa9d9883aaaad53695cb14ccdeb36d4060485bb9388446330bef9c361c252"}, + {file = "coverage-7.2.5-pp37.pp38.pp39-none-any.whl", hash = "sha256:8877d9b437b35a85c18e3c6499b23674684bf690f5d96c1006a1ef61f9fdf0f3"}, + {file = "coverage-7.2.5.tar.gz", hash = "sha256:f99ef080288f09ffc687423b8d60978cf3a465d3f404a18d1a05474bd8575a47"}, ] [package.dependencies] @@ -844,14 +847,14 @@ toml = ["tomli"] [[package]] name = "dash" -version = "2.9.2" +version = "2.9.3" description = "A Python framework for building reactive web-apps. Developed by Plotly." category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "dash-2.9.2-py3-none-any.whl", hash = "sha256:f54c0ce81defa92b429f967829f8a5ccdb0dc8208d4e4e96f7149a016c820071"}, - {file = "dash-2.9.2.tar.gz", hash = "sha256:ee3d244f560e58300d807f820f7854f8f9d3b482d0c99ddb3ff7cb4dc3be1346"}, + {file = "dash-2.9.3-py3-none-any.whl", hash = "sha256:a749ae1ea9de3fe7b785353a818ec9b629d39c6b7e02462954203bd1e296fd0e"}, + {file = "dash-2.9.3.tar.gz", hash = "sha256:47392f8d6455dc989a697407eb5941f3bad80604df985ab1ac9d4244568ffb34"}, ] [package.dependencies] @@ -922,60 +925,61 @@ files = [ [[package]] name = "dask" -version = "2023.3.2" +version = "2023.5.0" description = "Parallel PyData with Task Scheduling" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "dask-2023.3.2-py3-none-any.whl", hash = "sha256:5e64763d62feb18afd3ad66f364e0b4f456f7ac92e894fcc87950af75029ecdf"}, - {file = "dask-2023.3.2.tar.gz", hash = "sha256:51009e92ba9a280bd417633d1ae84f3ed23a8940f0a19594a4b7797ef226fff4"}, + {file = "dask-2023.5.0-py3-none-any.whl", hash = "sha256:32b34986519b7ddc0947c8ca63c2fc81b964e4c208dfb5cbf9f4f8aec92d152b"}, + {file = "dask-2023.5.0.tar.gz", hash = "sha256:4f4c28ac406e81b8f21b5be4b31b21308808f3e0e7c7e2f4a914f16476d9941b"}, ] [package.dependencies] -click = ">=7.0" -cloudpickle = ">=1.1.1" -fsspec = ">=0.6.0" +click = ">=8.0" +cloudpickle = ">=1.5.0" +fsspec = ">=2021.09.0" importlib-metadata = ">=4.13.0" numpy = {version = ">=1.21", optional = true, markers = "extra == \"array\""} packaging = ">=20.0" partd = ">=1.2.0" pyyaml = ">=5.3.1" -toolz = ">=0.8.2" +toolz = ">=0.10.0" [package.extras] array = ["numpy (>=1.21)"] -complete = ["bokeh (>=2.4.2,<3)", "distributed (==2023.3.2)", "jinja2 (>=2.10.3)", "lz4 (>=4.3.2)", "numpy (>=1.21)", "pandas (>=1.3)", "pyarrow (>=7.0)"] +complete = ["dask[array,dataframe,diagnostics,distributed]", "lz4 (>=4.3.2)", "pyarrow (>=7.0)"] dataframe = ["numpy (>=1.21)", "pandas (>=1.3)"] -diagnostics = ["bokeh (>=2.4.2,<3)", "jinja2 (>=2.10.3)"] -distributed = ["distributed (==2023.3.2)"] +diagnostics = ["bokeh (>=2.4.2)", "jinja2 (>=2.10.3)"] +distributed = ["distributed (==2023.5.0)"] test = ["pandas[test]", "pre-commit", "pytest", "pytest-rerunfailures", "pytest-xdist"] [[package]] name = "debugpy" -version = "1.6.6" +version = "1.6.7" description = "An implementation of the Debug Adapter Protocol for Python" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "debugpy-1.6.6-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:0ea1011e94416e90fb3598cc3ef5e08b0a4dd6ce6b9b33ccd436c1dffc8cd664"}, - {file = "debugpy-1.6.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dff595686178b0e75580c24d316aa45a8f4d56e2418063865c114eef651a982e"}, - {file = "debugpy-1.6.6-cp310-cp310-win32.whl", hash = "sha256:87755e173fcf2ec45f584bb9d61aa7686bb665d861b81faa366d59808bbd3494"}, - {file = "debugpy-1.6.6-cp310-cp310-win_amd64.whl", hash = "sha256:72687b62a54d9d9e3fb85e7a37ea67f0e803aaa31be700e61d2f3742a5683917"}, - {file = "debugpy-1.6.6-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:78739f77c58048ec006e2b3eb2e0cd5a06d5f48c915e2fc7911a337354508110"}, - {file = "debugpy-1.6.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23c29e40e39ad7d869d408ded414f6d46d82f8a93b5857ac3ac1e915893139ca"}, - {file = "debugpy-1.6.6-cp37-cp37m-win32.whl", hash = "sha256:7aa7e103610e5867d19a7d069e02e72eb2b3045b124d051cfd1538f1d8832d1b"}, - {file = "debugpy-1.6.6-cp37-cp37m-win_amd64.whl", hash = "sha256:f6383c29e796203a0bba74a250615ad262c4279d398e89d895a69d3069498305"}, - {file = "debugpy-1.6.6-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:23363e6d2a04d726bbc1400bd4e9898d54419b36b2cdf7020e3e215e1dcd0f8e"}, - {file = "debugpy-1.6.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b5d1b13d7c7bf5d7cf700e33c0b8ddb7baf030fcf502f76fc061ddd9405d16c"}, - {file = "debugpy-1.6.6-cp38-cp38-win32.whl", hash = "sha256:70ab53918fd907a3ade01909b3ed783287ede362c80c75f41e79596d5ccacd32"}, - {file = "debugpy-1.6.6-cp38-cp38-win_amd64.whl", hash = "sha256:c05349890804d846eca32ce0623ab66c06f8800db881af7a876dc073ac1c2225"}, - {file = "debugpy-1.6.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a771739902b1ae22a120dbbb6bd91b2cae6696c0e318b5007c5348519a4211c6"}, - {file = "debugpy-1.6.6-cp39-cp39-win32.whl", hash = "sha256:549ae0cb2d34fc09d1675f9b01942499751d174381b6082279cf19cdb3c47cbe"}, - {file = "debugpy-1.6.6-cp39-cp39-win_amd64.whl", hash = "sha256:de4a045fbf388e120bb6ec66501458d3134f4729faed26ff95de52a754abddb1"}, - {file = "debugpy-1.6.6-py2.py3-none-any.whl", hash = "sha256:be596b44448aac14eb3614248c91586e2bc1728e020e82ef3197189aae556115"}, - {file = "debugpy-1.6.6.zip", hash = "sha256:b9c2130e1c632540fbf9c2c88341493797ddf58016e7cba02e311de9b0a96b67"}, + {file = "debugpy-1.6.7-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b3e7ac809b991006ad7f857f016fa92014445085711ef111fdc3f74f66144096"}, + {file = "debugpy-1.6.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3876611d114a18aafef6383695dfc3f1217c98a9168c1aaf1a02b01ec7d8d1e"}, + {file = "debugpy-1.6.7-cp310-cp310-win32.whl", hash = "sha256:33edb4afa85c098c24cc361d72ba7c21bb92f501104514d4ffec1fb36e09c01a"}, + {file = "debugpy-1.6.7-cp310-cp310-win_amd64.whl", hash = "sha256:ed6d5413474e209ba50b1a75b2d9eecf64d41e6e4501977991cdc755dc83ab0f"}, + {file = "debugpy-1.6.7-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:38ed626353e7c63f4b11efad659be04c23de2b0d15efff77b60e4740ea685d07"}, + {file = "debugpy-1.6.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279d64c408c60431c8ee832dfd9ace7c396984fd7341fa3116aee414e7dcd88d"}, + {file = "debugpy-1.6.7-cp37-cp37m-win32.whl", hash = "sha256:dbe04e7568aa69361a5b4c47b4493d5680bfa3a911d1e105fbea1b1f23f3eb45"}, + {file = "debugpy-1.6.7-cp37-cp37m-win_amd64.whl", hash = "sha256:f90a2d4ad9a035cee7331c06a4cf2245e38bd7c89554fe3b616d90ab8aab89cc"}, + {file = "debugpy-1.6.7-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:5224eabbbeddcf1943d4e2821876f3e5d7d383f27390b82da5d9558fd4eb30a9"}, + {file = "debugpy-1.6.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae1123dff5bfe548ba1683eb972329ba6d646c3a80e6b4c06cd1b1dd0205e9b"}, + {file = "debugpy-1.6.7-cp38-cp38-win32.whl", hash = "sha256:9cd10cf338e0907fdcf9eac9087faa30f150ef5445af5a545d307055141dd7a4"}, + {file = "debugpy-1.6.7-cp38-cp38-win_amd64.whl", hash = "sha256:aaf6da50377ff4056c8ed470da24632b42e4087bc826845daad7af211e00faad"}, + {file = "debugpy-1.6.7-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:0679b7e1e3523bd7d7869447ec67b59728675aadfc038550a63a362b63029d2c"}, + {file = "debugpy-1.6.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de86029696e1b3b4d0d49076b9eba606c226e33ae312a57a46dca14ff370894d"}, + {file = "debugpy-1.6.7-cp39-cp39-win32.whl", hash = "sha256:d71b31117779d9a90b745720c0eab54ae1da76d5b38c8026c654f4a066b0130a"}, + {file = "debugpy-1.6.7-cp39-cp39-win_amd64.whl", hash = "sha256:c0ff93ae90a03b06d85b2c529eca51ab15457868a377c4cc40a23ab0e4e552a3"}, + {file = "debugpy-1.6.7-py2.py3-none-any.whl", hash = "sha256:53f7a456bc50706a0eaabecf2d3ce44c4d5010e46dfc65b6b81a518b42866267"}, + {file = "debugpy-1.6.7.zip", hash = "sha256:c4c2f0810fa25323abfdfa36cbbbb24e5c3b1a42cb762782de64439c575d67f2"}, ] [[package]] @@ -1115,38 +1119,39 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "filelock" -version = "3.10.7" +version = "3.12.0" description = "A platform independent file lock." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "filelock-3.10.7-py3-none-any.whl", hash = "sha256:bde48477b15fde2c7e5a0713cbe72721cb5a5ad32ee0b8f419907960b9d75536"}, - {file = "filelock-3.10.7.tar.gz", hash = "sha256:892be14aa8efc01673b5ed6589dbccb95f9a8596f0507e232626155495c18105"}, + {file = "filelock-3.12.0-py3-none-any.whl", hash = "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9"}, + {file = "filelock-3.12.0.tar.gz", hash = "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"}, ] [package.extras] -docs = ["furo (>=2022.12.7)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.2.2)", "diff-cover (>=7.5)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] +docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] [[package]] name = "flask" -version = "2.2.3" +version = "2.3.2" description = "A simple framework for building complex web applications." category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "Flask-2.2.3-py3-none-any.whl", hash = "sha256:c0bec9477df1cb867e5a67c9e1ab758de9cb4a3e52dd70681f59fa40a62b3f2d"}, - {file = "Flask-2.2.3.tar.gz", hash = "sha256:7eb373984bf1c770023fce9db164ed0c3353cd0b53f130f4693da0ca756a2e6d"}, + {file = "Flask-2.3.2-py3-none-any.whl", hash = "sha256:77fd4e1249d8c9923de34907236b747ced06e5467ecac1a7bb7115ae0e9670b0"}, + {file = "Flask-2.3.2.tar.gz", hash = "sha256:8c2f9abd47a9e8df7f0c3f091ce9497d011dc3b31effcf4c85a6e2b50f4114ef"}, ] [package.dependencies] -click = ">=8.0" +blinker = ">=1.6.2" +click = ">=8.1.3" importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.10\""} -itsdangerous = ">=2.0" -Jinja2 = ">=3.0" -Werkzeug = ">=2.2.2" +itsdangerous = ">=2.1.2" +Jinja2 = ">=3.1.2" +Werkzeug = ">=2.3.3" [package.extras] async = ["asgiref (>=3.2)"] @@ -1250,14 +1255,14 @@ files = [ [[package]] name = "fsspec" -version = "2023.3.0" +version = "2023.5.0" description = "File-system specification" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.3.0-py3-none-any.whl", hash = "sha256:bf57215e19dbfa4fe7edae53040cc1deef825e3b1605cca9a8d2c2fadd2328a0"}, - {file = "fsspec-2023.3.0.tar.gz", hash = "sha256:24e635549a590d74c6c18274ddd3ffab4753341753e923408b1904eaabafe04d"}, + {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"}, + {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"}, ] [package.extras] @@ -1265,7 +1270,9 @@ abfs = ["adlfs"] adl = ["adlfs"] arrow = ["pyarrow (>=1)"] dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] fuse = ["fusepy"] gcs = ["gcsfs"] git = ["pygit2"] @@ -1284,20 +1291,20 @@ tqdm = ["tqdm"] [[package]] name = "gcsfs" -version = "2023.3.0" +version = "2023.5.0" description = "Convenient Filesystem interface over GCS" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "gcsfs-2023.3.0-py2.py3-none-any.whl", hash = "sha256:f76e09c436ca7319d6f7647654c891d8618f341932a6a00c845eee8fa00c53e5"}, - {file = "gcsfs-2023.3.0.tar.gz", hash = "sha256:28dc035b4de9f131ecc542ce402ad8f208093e5b733b8b8f58908a709eb54280"}, + {file = "gcsfs-2023.5.0-py2.py3-none-any.whl", hash = "sha256:4f2ebc41814de3f566f85dec208704cf19823b9d04a55fd12b3142aef9046525"}, + {file = "gcsfs-2023.5.0.tar.gz", hash = "sha256:02a815e1cf28197ab4f57335e89dc5df8744a065c7c956d42692b50a9e8f1625"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2023.3.0" +fsspec = "2023.5.0" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -1332,14 +1339,14 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0dev)"] [[package]] name = "google-auth" -version = "2.17.1" +version = "2.18.0" description = "Google Authentication Library" category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" files = [ - {file = "google-auth-2.17.1.tar.gz", hash = "sha256:8f379b46bad381ad2a0b989dfb0c13ad28d3c2a79f27348213f8946a1d15d55a"}, - {file = "google_auth-2.17.1-py2.py3-none-any.whl", hash = "sha256:357ff22a75b4c0f6093470f21816a825d2adee398177569824e37b6c10069e19"}, + {file = "google-auth-2.18.0.tar.gz", hash = "sha256:c66b488a8b005b23ccb97b1198b6cece516c91869091ac5b7c267422db2733c7"}, + {file = "google_auth-2.18.0-py2.py3-none-any.whl", hash = "sha256:ef3f3a67fa54d421a1c155864570f9a8de9179cedc937bda496b7a8ca338e936"}, ] [package.dependencies] @@ -1347,6 +1354,7 @@ cachetools = ">=2.0.0,<6.0" pyasn1-modules = ">=0.2.1" rsa = {version = ">=3.1.4,<5", markers = "python_version >= \"3.6\""} six = ">=1.9.0" +urllib3 = "<2.0" [package.extras] aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "requests (>=2.20.0,<3.0.0dev)"] @@ -1395,14 +1403,14 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] [[package]] name = "google-cloud-storage" -version = "2.8.0" +version = "2.9.0" description = "Google Cloud Storage API client library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.8.0.tar.gz", hash = "sha256:4388da1ff5bda6d729f26dbcaf1bfa020a2a52a7b91f0a8123edbda51660802c"}, - {file = "google_cloud_storage-2.8.0-py2.py3-none-any.whl", hash = "sha256:248e210c13bc109909160248af546a91cb2dabaf3d7ebbf04def9dd49f02dbb6"}, + {file = "google-cloud-storage-2.9.0.tar.gz", hash = "sha256:9b6ae7b509fc294bdacb84d0f3ea8e20e2c54a8b4bbe39c5707635fec214eff3"}, + {file = "google_cloud_storage-2.9.0-py2.py3-none-any.whl", hash = "sha256:83a90447f23d5edd045e0037982c270302e3aeb45fc1288d2c2ca713d27bad94"}, ] [package.dependencies] @@ -1498,14 +1506,14 @@ testing = ["pytest"] [[package]] name = "google-resumable-media" -version = "2.4.1" +version = "2.5.0" description = "Utilities for Google Media Downloads and Resumable Uploads" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "google-resumable-media-2.4.1.tar.gz", hash = "sha256:15b8a2e75df42dc6502d1306db0bce2647ba6013f9cd03b6e17368c0886ee90a"}, - {file = "google_resumable_media-2.4.1-py2.py3-none-any.whl", hash = "sha256:831e86fd78d302c1a034730a0c6e5369dd11d37bad73fa69ca8998460d5bae8d"}, + {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, + {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, ] [package.dependencies] @@ -1535,14 +1543,14 @@ grpc = ["grpcio (>=1.44.0,<2.0.0dev)"] [[package]] name = "identify" -version = "2.5.22" +version = "2.5.24" description = "File identification library for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "identify-2.5.22-py2.py3-none-any.whl", hash = "sha256:f0faad595a4687053669c112004178149f6c326db71ee999ae4636685753ad2f"}, - {file = "identify-2.5.22.tar.gz", hash = "sha256:f7a93d6cf98e29bd07663c60728e7a4057615068d7a639d132dc883b2d54d31e"}, + {file = "identify-2.5.24-py2.py3-none-any.whl", hash = "sha256:986dbfb38b1140e763e413e6feb44cd731faf72d1909543178aa79b0e258265d"}, + {file = "identify-2.5.24.tar.gz", hash = "sha256:0aac67d5b4812498056d28a9a512a483f5085cc28640b02b258a59dac34301d4"}, ] [package.extras] @@ -1562,14 +1570,14 @@ files = [ [[package]] name = "igv-notebook" -version = "0.5.0" +version = "0.5.2" description = "Package for embedding the igv.js genome visualization in IPython notebooks" category = "main" optional = false python-versions = "*" files = [ - {file = "igv-notebook-0.5.0.tar.gz", hash = "sha256:9d29be5d1726514e8d984097b3be2346f5e4e803ca82ab229bec72ec76e64948"}, - {file = "igv_notebook-0.5.0-py3-none-any.whl", hash = "sha256:b78a0b83f18d0f3ac5882ec620b2c3ce95d4b20a1b9ee9b380739deeb775d4a7"}, + {file = "igv-notebook-0.5.2.tar.gz", hash = "sha256:8b47a1a6c41f11359a07264815401cc4000c99722c77cbb749182bf6b66cf69c"}, + {file = "igv_notebook-0.5.2-py3-none-any.whl", hash = "sha256:9f52a63cf4801cd8829760c8a85a28ccd0d480d6e62da5897f42887787be44bc"}, ] [package.dependencies] @@ -1647,14 +1655,14 @@ requests = "*" [[package]] name = "ipykernel" -version = "6.22.0" +version = "6.23.1" description = "IPython Kernel for Jupyter" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "ipykernel-6.22.0-py3-none-any.whl", hash = "sha256:1ae6047c1277508933078163721bbb479c3e7292778a04b4bacf0874550977d6"}, - {file = "ipykernel-6.22.0.tar.gz", hash = "sha256:302558b81f1bc22dc259fb2a0c5c7cf2f4c0bdb21b50484348f7bafe7fb71421"}, + {file = "ipykernel-6.23.1-py3-none-any.whl", hash = "sha256:77aeffab056c21d16f1edccdc9e5ccbf7d96eb401bd6703610a21be8b068aadc"}, + {file = "ipykernel-6.23.1.tar.gz", hash = "sha256:1aba0ae8453e15e9bc6b24e497ef6840114afcdb832ae597f32137fa19d42a6f"}, ] [package.dependencies] @@ -1699,14 +1707,14 @@ xyzservices = ">=2021.8.1" [[package]] name = "ipython" -version = "8.12.0" +version = "8.12.2" description = "IPython: Productive Interactive Computing" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "ipython-8.12.0-py3-none-any.whl", hash = "sha256:1c183bf61b148b00bcebfa5d9b39312733ae97f6dad90d7e9b4d86c8647f498c"}, - {file = "ipython-8.12.0.tar.gz", hash = "sha256:a950236df04ad75b5bc7f816f9af3d74dc118fd42f2ff7e80e8e60ca1f182e2d"}, + {file = "ipython-8.12.2-py3-none-any.whl", hash = "sha256:ea8801f15dfe4ffb76dea1b09b847430ffd70d827b41735c64a0638a04103bfc"}, + {file = "ipython-8.12.2.tar.gz", hash = "sha256:c7b80eb7f5a855a88efc971fda506ff7a91c280b42cdae26643e0f601ea281ea"}, ] [package.dependencies] @@ -1838,14 +1846,14 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "json5" -version = "0.9.11" +version = "0.9.14" description = "A Python implementation of the JSON5 data format." category = "dev" optional = false python-versions = "*" files = [ - {file = "json5-0.9.11-py2.py3-none-any.whl", hash = "sha256:1aa54b80b5e507dfe31d12b7743a642e2ffa6f70bf73b8e3d7d1d5fba83d99bd"}, - {file = "json5-0.9.11.tar.gz", hash = "sha256:4f1e196acc55b83985a51318489f345963c7ba84aa37607e49073066c562e99b"}, + {file = "json5-0.9.14-py2.py3-none-any.whl", hash = "sha256:740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f"}, + {file = "json5-0.9.14.tar.gz", hash = "sha256:9ed66c3a6ca3510a976a9ef9b8c0787de24802724ab1860bc0153c7fdd589b02"}, ] [package.extras] @@ -1895,14 +1903,14 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- [[package]] name = "jupyter-client" -version = "8.1.0" +version = "8.2.0" description = "Jupyter protocol implementation and client libraries" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_client-8.1.0-py3-none-any.whl", hash = "sha256:d5b8e739d7816944be50f81121a109788a3d92732ecf1ad1e4dadebc948818fe"}, - {file = "jupyter_client-8.1.0.tar.gz", hash = "sha256:3fbab64100a0dcac7701b1e0f1a4412f1ccb45546ff2ad9bc4fcbe4e19804811"}, + {file = "jupyter_client-8.2.0-py3-none-any.whl", hash = "sha256:b18219aa695d39e2ad570533e0d71fb7881d35a873051054a84ee2a17c4b7389"}, + {file = "jupyter_client-8.2.0.tar.gz", hash = "sha256:9fe233834edd0e6c0aa5f05ca2ab4bdea1842bfd2d8a932878212fc5301ddaf0"}, ] [package.dependencies] @@ -1915,7 +1923,7 @@ traitlets = ">=5.3" [package.extras] docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] -test = ["codecov", "coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] +test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"] [[package]] name = "jupyter-core" @@ -1988,6 +1996,22 @@ cli = ["click", "rich"] docs = ["jupyterlite-sphinx", "myst-parser", "pydata-sphinx-theme", "sphinxcontrib-spelling"] test = ["click", "coverage", "pre-commit", "pytest (>=7.0)", "pytest-asyncio (>=0.19.0)", "pytest-console-scripts", "pytest-cov", "rich"] +[[package]] +name = "jupyter-lsp" +version = "2.1.0" +description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server" +category = "dev" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jupyter-lsp-2.1.0.tar.gz", hash = "sha256:3aa2cbd81d3446256c34e3647d71b8f50617d07862a1c60fbe123f901cdb0dd2"}, + {file = "jupyter_lsp-2.1.0-py3-none-any.whl", hash = "sha256:d7c058cfe8bd7a76859734f3a142edb50a2d1e265a7e323c2fdcd8b1db80a91b"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""} +jupyter-server = ">=1.1.2" + [[package]] name = "jupyter-server" version = "2.5.0" @@ -2024,26 +2048,6 @@ websocket-client = "*" docs = ["docutils (<0.20)", "ipykernel", "jinja2", "jupyter-client", "jupyter-server", "mistune (<1.0.0)", "myst-parser", "nbformat", "prometheus-client", "pydata-sphinx-theme", "send2trash", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-openapi", "sphinxcontrib-spelling", "sphinxemoji", "tornado", "typing-extensions"] test = ["ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-scripts", "pytest-jupyter[server] (>=0.4)", "pytest-timeout", "requests"] -[[package]] -name = "jupyter-server-fileid" -version = "0.8.0" -description = "" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "jupyter_server_fileid-0.8.0-py3-none-any.whl", hash = "sha256:6092ef114eddccf6cba69c0f0feb612c2f476f2e9467828809edb854c18806bb"}, - {file = "jupyter_server_fileid-0.8.0.tar.gz", hash = "sha256:1e0816d0857f490fadea11348570f0cba03f70f315c9842225aecfa45882b6af"}, -] - -[package.dependencies] -jupyter-events = ">=0.5.0" -jupyter-server = ">=1.15,<3" - -[package.extras] -cli = ["click"] -test = ["jupyter-server[test] (>=1.15,<3)", "pytest", "pytest-cov"] - [[package]] name = "jupyter-server-terminals" version = "0.4.4" @@ -2064,74 +2068,39 @@ terminado = ">=0.8.3" docs = ["jinja2", "jupyter-server", "mistune (<3.0)", "myst-parser", "nbformat", "packaging", "pydata-sphinx-theme", "sphinxcontrib-github-alt", "sphinxcontrib-openapi", "sphinxcontrib-spelling", "sphinxemoji", "tornado"] test = ["coverage", "jupyter-server (>=2.0.0)", "pytest (>=7.0)", "pytest-cov", "pytest-jupyter[server] (>=0.5.3)", "pytest-timeout"] -[[package]] -name = "jupyter-server-ydoc" -version = "0.8.0" -description = "A Jupyter Server Extension Providing Y Documents." -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "jupyter_server_ydoc-0.8.0-py3-none-any.whl", hash = "sha256:969a3a1a77ed4e99487d60a74048dc9fa7d3b0dcd32e60885d835bbf7ba7be11"}, - {file = "jupyter_server_ydoc-0.8.0.tar.gz", hash = "sha256:a6fe125091792d16c962cc3720c950c2b87fcc8c3ecf0c54c84e9a20b814526c"}, -] - -[package.dependencies] -jupyter-server-fileid = ">=0.6.0,<1" -jupyter-ydoc = ">=0.2.0,<0.4.0" -ypy-websocket = ">=0.8.2,<0.9.0" - -[package.extras] -test = ["coverage", "jupyter-server[test] (>=2.0.0a0)", "pytest (>=7.0)", "pytest-cov", "pytest-timeout", "pytest-tornasync"] - -[[package]] -name = "jupyter-ydoc" -version = "0.2.3" -description = "Document structures for collaborative editing using Ypy" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "jupyter_ydoc-0.2.3-py3-none-any.whl", hash = "sha256:3ac51abfe378c6aeb62a449e8f0241bede1205f0199b0d27429140cbba950f79"}, - {file = "jupyter_ydoc-0.2.3.tar.gz", hash = "sha256:98db7785215873c64d7dfcb1b741f41df11994c4b3d7e2957e004b392d6f11ea"}, -] - -[package.dependencies] -importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} -y-py = ">=0.5.3,<0.6.0" - -[package.extras] -dev = ["click", "jupyter-releaser"] -test = ["pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)", "ypy-websocket (>=0.3.1,<0.4.0)"] - [[package]] name = "jupyterlab" -version = "3.6.3" +version = "4.0.0" description = "JupyterLab computational environment" category = "dev" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "jupyterlab-3.6.3-py3-none-any.whl", hash = "sha256:6aba0caa771697d02fbf409f9767b2fdb4ee32ce935940e3b9a0d5d48d994d0f"}, - {file = "jupyterlab-3.6.3.tar.gz", hash = "sha256:373e9cfb8a72edd294be14f16662563a220cecf0fb26de7aab1af9a29b689b82"}, + {file = "jupyterlab-4.0.0-py3-none-any.whl", hash = "sha256:e2f67c189f833963c271a89df6bfa3eec4d5c8f7827ad3059538c5f467de193b"}, + {file = "jupyterlab-4.0.0.tar.gz", hash = "sha256:ce656d04828b2e4ee0758e22c862cc99aedec66a10319d09f0fd5ea51be68dd8"}, ] [package.dependencies] -ipython = "*" -jinja2 = ">=2.1" +async-lru = ">=1.0.0" +importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""} +importlib-resources = {version = ">=1.4", markers = "python_version < \"3.9\""} +ipykernel = "*" +jinja2 = ">=3.0.3" jupyter-core = "*" -jupyter-server = ">=1.16.0,<3" -jupyter-server-ydoc = ">=0.8.0,<0.9.0" -jupyter-ydoc = ">=0.2.3,<0.3.0" -jupyterlab-server = ">=2.19,<3.0" -nbclassic = "*" -notebook = "<7" +jupyter-lsp = ">=2.0.0" +jupyter-server = ">=2.4.0,<3" +jupyterlab-server = ">=2.19.0,<3" +notebook-shim = ">=0.2" packaging = "*" tomli = {version = "*", markers = "python_version < \"3.11\""} -tornado = ">=6.1.0" +tornado = ">=6.2.0" +traitlets = "*" [package.extras] -test = ["check-manifest", "coverage", "jupyterlab-server[test]", "pre-commit", "pytest (>=6.0)", "pytest-check-links (>=0.5)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter (>=0.5.3)", "requests", "requests-cache", "virtualenv"] +dev = ["black[jupyter] (==23.3.0)", "build", "bump2version", "coverage", "hatch", "pre-commit", "pytest-cov", "ruff (==0.0.263)"] +docs = ["jsx-lexer", "myst-parser", "pydata-sphinx-theme (>=0.13.0)", "pytest", "pytest-check-links", "pytest-tornasync", "sphinx (>=1.8)", "sphinx-copybutton"] +docs-screenshots = ["altair (==4.2.2)", "ipython (==8.13.1)", "ipywidgets (==8.0.6)", "jupyterlab-geojson (==3.3.1)", "jupyterlab-language-pack-zh-cn (==3.6.post1)", "matplotlib (==3.7.1)", "nbconvert (>=7.0.0)", "pandas (==2.0.1)", "scipy (==1.10.1)", "vega-datasets (==0.9.0)"] +test = ["coverage", "pytest (>=7.0)", "pytest-check-links (>=0.7)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter (>=0.5.3)", "pytest-timeout", "pytest-tornasync", "requests", "requests-cache", "virtualenv"] [[package]] name = "jupyterlab-pygments" @@ -2147,14 +2116,14 @@ files = [ [[package]] name = "jupyterlab-server" -version = "2.22.0" +version = "2.22.1" description = "A set of server components for JupyterLab and JupyterLab like applications." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "jupyterlab_server-2.22.0-py3-none-any.whl", hash = "sha256:f4a7263ada89958854631a64bed45285caeac482925233159709f643c5871490"}, - {file = "jupyterlab_server-2.22.0.tar.gz", hash = "sha256:0f9f6752b0c534a7b22a6542b984fa6a2c18ab4d4e0a4c79f191138506a9a75f"}, + {file = "jupyterlab_server-2.22.1-py3-none-any.whl", hash = "sha256:1c8eb55c7cd70a50a51fef42a7b4e26ef2f7fc48728f0290604bd89b1dd156e6"}, + {file = "jupyterlab_server-2.22.1.tar.gz", hash = "sha256:dfaaf898af84b9d01ae9583b813f378b96ee90c3a66f24c5186ea5d1bbdb2089"}, ] [package.dependencies] @@ -2170,7 +2139,7 @@ requests = ">=2.28" [package.extras] docs = ["autodoc-traits", "docutils (<0.20)", "jinja2 (<3.2.0)", "mistune (<3)", "myst-parser", "pydata-sphinx-theme", "sphinx", "sphinx-copybutton", "sphinxcontrib-openapi"] openapi = ["openapi-core (>=0.16.1,<0.17.0)", "ruamel-yaml"] -test = ["codecov", "hatch", "ipykernel", "jupyterlab-server[openapi]", "openapi-spec-validator (>=0.5.1,<0.6.0)", "pytest (>=7.0)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter[server] (>=0.6.2)", "pytest-timeout", "requests-mock", "sphinxcontrib-spelling", "strict-rfc3339", "werkzeug"] +test = ["hatch", "ipykernel", "jupyterlab-server[openapi]", "openapi-spec-validator (>=0.5.1,<0.6.0)", "pytest (>=7.0)", "pytest-console-scripts", "pytest-cov", "pytest-jupyter[server] (>=0.6.2)", "pytest-timeout", "requests-mock", "sphinxcontrib-spelling", "strict-rfc3339", "werkzeug"] [[package]] name = "jupyterlab-widgets" @@ -2186,40 +2155,36 @@ files = [ [[package]] name = "llvmlite" -version = "0.39.1" +version = "0.40.0" description = "lightweight wrapper around basic LLVM functionality" category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "llvmlite-0.39.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6717c7a6e93c9d2c3d07c07113ec80ae24af45cde536b34363d4bcd9188091d9"}, - {file = "llvmlite-0.39.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ddab526c5a2c4ccb8c9ec4821fcea7606933dc53f510e2a6eebb45a418d3488a"}, - {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3f331a323d0f0ada6b10d60182ef06c20a2f01be21699999d204c5750ffd0b4"}, - {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2c00ff204afa721b0bb9835b5bf1ba7fba210eefcec5552a9e05a63219ba0dc"}, - {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16f56eb1eec3cda3a5c526bc3f63594fc24e0c8d219375afeb336f289764c6c7"}, - {file = "llvmlite-0.39.1-cp310-cp310-win32.whl", hash = "sha256:d0bfd18c324549c0fec2c5dc610fd024689de6f27c6cc67e4e24a07541d6e49b"}, - {file = "llvmlite-0.39.1-cp310-cp310-win_amd64.whl", hash = "sha256:7ebf1eb9badc2a397d4f6a6c8717447c81ac011db00064a00408bc83c923c0e4"}, - {file = "llvmlite-0.39.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6546bed4e02a1c3d53a22a0bced254b3b6894693318b16c16c8e43e29d6befb6"}, - {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1578f5000fdce513712e99543c50e93758a954297575610f48cb1fd71b27c08a"}, - {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3803f11ad5f6f6c3d2b545a303d68d9fabb1d50e06a8d6418e6fcd2d0df00959"}, - {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50aea09a2b933dab7c9df92361b1844ad3145bfb8dd2deb9cd8b8917d59306fb"}, - {file = "llvmlite-0.39.1-cp37-cp37m-win32.whl", hash = "sha256:b1a0bbdb274fb683f993198775b957d29a6f07b45d184c571ef2a721ce4388cf"}, - {file = "llvmlite-0.39.1-cp37-cp37m-win_amd64.whl", hash = "sha256:e172c73fccf7d6db4bd6f7de963dedded900d1a5c6778733241d878ba613980e"}, - {file = "llvmlite-0.39.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e31f4b799d530255aaf0566e3da2df5bfc35d3cd9d6d5a3dcc251663656c27b1"}, - {file = "llvmlite-0.39.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:62c0ea22e0b9dffb020601bb65cb11dd967a095a488be73f07d8867f4e327ca5"}, - {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ffc84ade195abd4abcf0bd3b827b9140ae9ef90999429b9ea84d5df69c9058c"}, - {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0f158e4708dda6367d21cf15afc58de4ebce979c7a1aa2f6b977aae737e2a54"}, - {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22d36591cd5d02038912321d9ab8e4668e53ae2211da5523f454e992b5e13c36"}, - {file = "llvmlite-0.39.1-cp38-cp38-win32.whl", hash = "sha256:4c6ebace910410daf0bebda09c1859504fc2f33d122e9a971c4c349c89cca630"}, - {file = "llvmlite-0.39.1-cp38-cp38-win_amd64.whl", hash = "sha256:fb62fc7016b592435d3e3a8f680e3ea8897c3c9e62e6e6cc58011e7a4801439e"}, - {file = "llvmlite-0.39.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa9b26939ae553bf30a9f5c4c754db0fb2d2677327f2511e674aa2f5df941789"}, - {file = "llvmlite-0.39.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e4f212c018db951da3e1dc25c2651abc688221934739721f2dad5ff1dd5f90e7"}, - {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39dc2160aed36e989610fc403487f11b8764b6650017ff367e45384dff88ffbf"}, - {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ec3d70b3e507515936e475d9811305f52d049281eaa6c8273448a61c9b5b7e2"}, - {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60f8dd1e76f47b3dbdee4b38d9189f3e020d22a173c00f930b52131001d801f9"}, - {file = "llvmlite-0.39.1-cp39-cp39-win32.whl", hash = "sha256:03aee0ccd81735696474dc4f8b6be60774892a2929d6c05d093d17392c237f32"}, - {file = "llvmlite-0.39.1-cp39-cp39-win_amd64.whl", hash = "sha256:3fc14e757bc07a919221f0cbaacb512704ce5774d7fcada793f1996d6bc75f2a"}, - {file = "llvmlite-0.39.1.tar.gz", hash = "sha256:b43abd7c82e805261c425d50335be9a6c4f84264e34d6d6e475207300005d572"}, + {file = "llvmlite-0.40.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90a46db1ed219d93ef05245ec17cf243074ec2b2687209cb310a803a2c2510dc"}, + {file = "llvmlite-0.40.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b9d742b66023298532d0e7beddd3d9f04334c046df7a02a1ec2ba8b4046a978c"}, + {file = "llvmlite-0.40.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ff38c309dc758b996d556e599e00647e6b8dbd21125c06b2d0584a9984a2288"}, + {file = "llvmlite-0.40.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66ecb8cdee35bbbdad9b331f446641977645de1973f6270bf4194307a1753666"}, + {file = "llvmlite-0.40.0-cp310-cp310-win32.whl", hash = "sha256:83dd5148f6ddd4d35585b69ebaa50605fdf8011a5b7259a0463afd4aefc62414"}, + {file = "llvmlite-0.40.0-cp310-cp310-win_amd64.whl", hash = "sha256:f72d6ccbfd9cc7da43098fcef23ffbe173ce2d986215072dbb2e7929412f9ff8"}, + {file = "llvmlite-0.40.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbf19077144e159406ef222348d5330d5061177fb79d3f7f82abf2cf29b77c0b"}, + {file = "llvmlite-0.40.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4732d6c981f658f014dd2ab2b682ac631cd12a6695e77c2d460cc68dc767868"}, + {file = "llvmlite-0.40.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2754c4d2b6f027ab45425abd94dee4cbd228b598531b1e9e1fc15f3298265d88"}, + {file = "llvmlite-0.40.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb79b992bdc2e62c5f5f86263d5546b5298d498e7c1a9d64b3a6f0d31f46ba5b"}, + {file = "llvmlite-0.40.0-cp311-cp311-win_amd64.whl", hash = "sha256:be0ff5b68a86e47a7ec6cd5389bb17b4b8f020b981628c9e714dc2cfdbe89c86"}, + {file = "llvmlite-0.40.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f5d4445eccd9c9c5639b35cb6279231f97cbd77a1c49fb41c05081ff96e041db"}, + {file = "llvmlite-0.40.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:060f00611d8e65d6db80cabba17fbefde9ebefbfb6937fe5677f06cd3e7bbe3c"}, + {file = "llvmlite-0.40.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58f5ba5febb2372418a3d37bd76d51bb987276a6fd979c2f2772b60b9061e575"}, + {file = "llvmlite-0.40.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d1622237e6ce543ac185751f782c7e10cabe45abf2de802cd5dca8023805a5c"}, + {file = "llvmlite-0.40.0-cp38-cp38-win32.whl", hash = "sha256:06803a1a38f911576bbe63a4082334d6661c59f2080e4681de1c66ec4924b0ac"}, + {file = "llvmlite-0.40.0-cp38-cp38-win_amd64.whl", hash = "sha256:87c2114567f95c715ae35b03d82caa0df66a978c93a1ff752964949e9ce596d5"}, + {file = "llvmlite-0.40.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a3382d81fcda57f5502f45a9ca62e0c9103fabd5f817c9820c7e61b9375f3d7"}, + {file = "llvmlite-0.40.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:260b0241c17a1ec585020e1df58ed30b9975c3573c619fa1724ceb4cd53cbe42"}, + {file = "llvmlite-0.40.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f980992b6c9dfee20a1608c5a4d875f8a52d76353ca02470550a85be6e5d3680"}, + {file = "llvmlite-0.40.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52eee9e245ef6eb911d6c2a3a1a66378745a40c637284386031b0915754f457e"}, + {file = "llvmlite-0.40.0-cp39-cp39-win32.whl", hash = "sha256:d27c2ec699b820296659dfd36ead1c527eb190c6d5cb3de24bfbee1024bdc20a"}, + {file = "llvmlite-0.40.0-cp39-cp39-win_amd64.whl", hash = "sha256:6cf84141d1793c69285b88acf4216370cb831eab99778546a2a9002fadac932d"}, + {file = "llvmlite-0.40.0.tar.gz", hash = "sha256:c910b8fbfd67b8e9d0b10ebc012b23cd67cbecef1b96f00d391ddd298d71671c"}, ] [[package]] @@ -2419,38 +2384,38 @@ files = [ [[package]] name = "mypy" -version = "1.1.1" +version = "1.3.0" description = "Optional static typing for Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mypy-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39c7119335be05630611ee798cc982623b9e8f0cff04a0b48dfc26100e0b97af"}, - {file = "mypy-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:61bf08362e93b6b12fad3eab68c4ea903a077b87c90ac06c11e3d7a09b56b9c1"}, - {file = "mypy-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbb19c9f662e41e474e0cff502b7064a7edc6764f5262b6cd91d698163196799"}, - {file = "mypy-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:315ac73cc1cce4771c27d426b7ea558fb4e2836f89cb0296cbe056894e3a1f78"}, - {file = "mypy-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5cb14ff9919b7df3538590fc4d4c49a0f84392237cbf5f7a816b4161c061829e"}, - {file = "mypy-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:26cdd6a22b9b40b2fd71881a8a4f34b4d7914c679f154f43385ca878a8297389"}, - {file = "mypy-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5b5f81b40d94c785f288948c16e1f2da37203c6006546c5d947aab6f90aefef2"}, - {file = "mypy-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21b437be1c02712a605591e1ed1d858aba681757a1e55fe678a15c2244cd68a5"}, - {file = "mypy-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d809f88734f44a0d44959d795b1e6f64b2bbe0ea4d9cc4776aa588bb4229fc1c"}, - {file = "mypy-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:a380c041db500e1410bb5b16b3c1c35e61e773a5c3517926b81dfdab7582be54"}, - {file = "mypy-1.1.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b7c7b708fe9a871a96626d61912e3f4ddd365bf7f39128362bc50cbd74a634d5"}, - {file = "mypy-1.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1c10fa12df1232c936830839e2e935d090fc9ee315744ac33b8a32216b93707"}, - {file = "mypy-1.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0a28a76785bf57655a8ea5eb0540a15b0e781c807b5aa798bd463779988fa1d5"}, - {file = "mypy-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:ef6a01e563ec6a4940784c574d33f6ac1943864634517984471642908b30b6f7"}, - {file = "mypy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d64c28e03ce40d5303450f547e07418c64c241669ab20610f273c9e6290b4b0b"}, - {file = "mypy-1.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64cc3afb3e9e71a79d06e3ed24bb508a6d66f782aff7e56f628bf35ba2e0ba51"}, - {file = "mypy-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce61663faf7a8e5ec6f456857bfbcec2901fbdb3ad958b778403f63b9e606a1b"}, - {file = "mypy-1.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2b0c373d071593deefbcdd87ec8db91ea13bd8f1328d44947e88beae21e8d5e9"}, - {file = "mypy-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:2888ce4fe5aae5a673386fa232473014056967f3904f5abfcf6367b5af1f612a"}, - {file = "mypy-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:19ba15f9627a5723e522d007fe708007bae52b93faab00f95d72f03e1afa9598"}, - {file = "mypy-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:59bbd71e5c58eed2e992ce6523180e03c221dcd92b52f0e792f291d67b15a71c"}, - {file = "mypy-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9401e33814cec6aec8c03a9548e9385e0e228fc1b8b0a37b9ea21038e64cdd8a"}, - {file = "mypy-1.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b398d8b1f4fba0e3c6463e02f8ad3346f71956b92287af22c9b12c3ec965a9f"}, - {file = "mypy-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:69b35d1dcb5707382810765ed34da9db47e7f95b3528334a3c999b0c90fe523f"}, - {file = "mypy-1.1.1-py3-none-any.whl", hash = "sha256:4e4e8b362cdf99ba00c2b218036002bdcdf1e0de085cdb296a49df03fb31dfc4"}, - {file = "mypy-1.1.1.tar.gz", hash = "sha256:ae9ceae0f5b9059f33dbc62dea087e942c0ccab4b7a003719cb70f9b8abfa32f"}, + {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"}, + {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"}, + {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"}, + {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"}, + {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"}, + {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"}, + {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"}, + {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"}, + {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"}, + {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"}, + {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"}, + {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"}, + {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"}, + {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"}, + {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"}, + {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"}, + {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"}, + {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"}, + {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"}, + {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"}, + {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"}, + {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"}, + {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"}, ] [package.dependencies] @@ -2478,14 +2443,14 @@ files = [ [[package]] name = "nbclassic" -version = "0.5.4" +version = "1.0.0" description = "Jupyter Notebook as a Jupyter Server extension." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "nbclassic-0.5.4-py3-none-any.whl", hash = "sha256:df78e4ba143f35084ad060deec16cb1d4839dde47bfdbc4232beb7071a6d70ea"}, - {file = "nbclassic-0.5.4.tar.gz", hash = "sha256:312b3f7d7ff2e6c261d51799bb12e6493498ab47d3469d3a01015d5533fd4d2b"}, + {file = "nbclassic-1.0.0-py3-none-any.whl", hash = "sha256:f99e4769b4750076cd4235c044b61232110733322384a94a63791d2e7beacc66"}, + {file = "nbclassic-1.0.0.tar.gz", hash = "sha256:0ae11eb2319455d805596bf320336cda9554b41d99ab9a3c31bf8180bffa30e3"}, ] [package.dependencies] @@ -2499,7 +2464,7 @@ jupyter-server = ">=1.8" nbconvert = ">=5" nbformat = "*" nest-asyncio = ">=1.5" -notebook-shim = ">=0.1.0" +notebook-shim = ">=0.2.3" prometheus-client = "*" pyzmq = ">=17" Send2Trash = ">=1.8.0" @@ -2514,14 +2479,14 @@ test = ["coverage", "nbval", "pytest", "pytest-cov", "pytest-jupyter", "pytest-p [[package]] name = "nbclient" -version = "0.7.3" +version = "0.7.4" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." category = "dev" optional = false python-versions = ">=3.7.0" files = [ - {file = "nbclient-0.7.3-py3-none-any.whl", hash = "sha256:8fa96f7e36693d5e83408f5e840f113c14a45c279befe609904dbe05dad646d1"}, - {file = "nbclient-0.7.3.tar.gz", hash = "sha256:26e41c6dca4d76701988bc34f64e1bfc2413ae6d368f13d7b5ac407efb08c755"}, + {file = "nbclient-0.7.4-py3-none-any.whl", hash = "sha256:c817c0768c5ff0d60e468e017613e6eae27b6fa31e43f905addd2d24df60c125"}, + {file = "nbclient-0.7.4.tar.gz", hash = "sha256:d447f0e5a4cfe79d462459aec1b3dc5c2e9152597262be8ee27f7d4c02566a0d"}, ] [package.dependencies] @@ -2537,14 +2502,14 @@ test = ["flaky", "ipykernel", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "p [[package]] name = "nbconvert" -version = "7.3.0" +version = "7.4.0" description = "Converting Jupyter Notebooks" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "nbconvert-7.3.0-py3-none-any.whl", hash = "sha256:8983a83d0b083d56b076019f0a319f63bc16af70c9372892b86a0aab0a264b1d"}, - {file = "nbconvert-7.3.0.tar.gz", hash = "sha256:b970a13aba97529c223d805dd0706c2fe04dfc05e250ad4e6f7ae33daf6fede1"}, + {file = "nbconvert-7.4.0-py3-none-any.whl", hash = "sha256:af5064a9db524f9f12f4e8be7f0799524bd5b14c1adea37e34e83c95127cc818"}, + {file = "nbconvert-7.4.0.tar.gz", hash = "sha256:51b6c77b507b177b73f6729dba15676e42c4e92bcb00edc8cc982ee72e7d89d7"}, ] [package.dependencies] @@ -2610,14 +2575,14 @@ files = [ [[package]] name = "nodeenv" -version = "1.7.0" +version = "1.8.0" description = "Node.js virtual environment builder" category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ - {file = "nodeenv-1.7.0-py2.py3-none-any.whl", hash = "sha256:27083a7b96a25f2f5e1d8cb4b6317ee8aeda3bdd121394e5ac54e498028a042e"}, - {file = "nodeenv-1.7.0.tar.gz", hash = "sha256:e0e7f7dfb85fc5394c6fe1e8fa98131a2473e04311a45afb6508f7cf1836fa2b"}, + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, ] [package.dependencies] @@ -2625,14 +2590,14 @@ setuptools = "*" [[package]] name = "notebook" -version = "6.5.3" +version = "6.5.4" description = "A web-based notebook environment for interactive computing" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "notebook-6.5.3-py3-none-any.whl", hash = "sha256:50a334ad9d60b30cb759405168ef6fc3d60350ab5439fb1631544bb09dcb2cce"}, - {file = "notebook-6.5.3.tar.gz", hash = "sha256:b12bee3292211d85dd7e588a790ddce30cb3e8fbcfa1e803522a207f60819e05"}, + {file = "notebook-6.5.4-py3-none-any.whl", hash = "sha256:dd17e78aefe64c768737b32bf171c1c766666a21cc79a44d37a1700771cab56f"}, + {file = "notebook-6.5.4.tar.gz", hash = "sha256:517209568bd47261e2def27a140e97d49070602eea0d226a696f42a7f16c9a4e"}, ] [package.dependencies] @@ -2660,65 +2625,60 @@ test = ["coverage", "nbval", "pytest", "pytest-cov", "requests", "requests-unixs [[package]] name = "notebook-shim" -version = "0.2.2" +version = "0.2.3" description = "A shim layer for notebook traits and config" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "notebook_shim-0.2.2-py3-none-any.whl", hash = "sha256:9c6c30f74c4fbea6fce55c1be58e7fd0409b1c681b075dcedceb005db5026949"}, - {file = "notebook_shim-0.2.2.tar.gz", hash = "sha256:090e0baf9a5582ff59b607af523ca2db68ff216da0c69956b62cab2ef4fc9c3f"}, + {file = "notebook_shim-0.2.3-py3-none-any.whl", hash = "sha256:a83496a43341c1674b093bfcebf0fe8e74cbe7eda5fd2bbc56f8e39e1486c0c7"}, + {file = "notebook_shim-0.2.3.tar.gz", hash = "sha256:f69388ac283ae008cd506dda10d0288b09a017d822d5e8c7129a152cbd3ce7e9"}, ] [package.dependencies] jupyter-server = ">=1.8,<3" [package.extras] -test = ["pytest", "pytest-console-scripts", "pytest-tornasync"] +test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync"] [[package]] name = "numba" -version = "0.56.4" +version = "0.57.0" description = "compiling Python code using LLVM" category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "numba-0.56.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9f62672145f8669ec08762895fe85f4cf0ead08ce3164667f2b94b2f62ab23c3"}, - {file = "numba-0.56.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c602d015478b7958408d788ba00a50272649c5186ea8baa6cf71d4a1c761bba1"}, - {file = "numba-0.56.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85dbaed7a05ff96492b69a8900c5ba605551afb9b27774f7f10511095451137c"}, - {file = "numba-0.56.4-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f4cfc3a19d1e26448032049c79fc60331b104f694cf570a9e94f4e2c9d0932bb"}, - {file = "numba-0.56.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e08e203b163ace08bad500b0c16f6092b1eb34fd1fce4feaf31a67a3a5ecf3b"}, - {file = "numba-0.56.4-cp310-cp310-win32.whl", hash = "sha256:0611e6d3eebe4cb903f1a836ffdb2bda8d18482bcd0a0dcc56e79e2aa3fefef5"}, - {file = "numba-0.56.4-cp310-cp310-win_amd64.whl", hash = "sha256:fbfb45e7b297749029cb28694abf437a78695a100e7c2033983d69f0ba2698d4"}, - {file = "numba-0.56.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:3cb1a07a082a61df80a468f232e452d818f5ae254b40c26390054e4e868556e0"}, - {file = "numba-0.56.4-cp37-cp37m-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d69ad934e13c15684e7887100a8f5f0f61d7a8e57e0fd29d9993210089a5b531"}, - {file = "numba-0.56.4-cp37-cp37m-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:dbcc847bac2d225265d054993a7f910fda66e73d6662fe7156452cac0325b073"}, - {file = "numba-0.56.4-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8a95ca9cc77ea4571081f6594e08bd272b66060634b8324e99cd1843020364f9"}, - {file = "numba-0.56.4-cp37-cp37m-win32.whl", hash = "sha256:fcdf84ba3ed8124eb7234adfbb8792f311991cbf8aed1cad4b1b1a7ee08380c1"}, - {file = "numba-0.56.4-cp37-cp37m-win_amd64.whl", hash = "sha256:42f9e1be942b215df7e6cc9948cf9c15bb8170acc8286c063a9e57994ef82fd1"}, - {file = "numba-0.56.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:553da2ce74e8862e18a72a209ed3b6d2924403bdd0fb341fa891c6455545ba7c"}, - {file = "numba-0.56.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4373da9757049db7c90591e9ec55a2e97b2b36ba7ae3bf9c956a513374077470"}, - {file = "numba-0.56.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3a993349b90569518739009d8f4b523dfedd7e0049e6838c0e17435c3e70dcc4"}, - {file = "numba-0.56.4-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:720886b852a2d62619ae3900fe71f1852c62db4f287d0c275a60219e1643fc04"}, - {file = "numba-0.56.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e64d338b504c9394a4a34942df4627e1e6cb07396ee3b49fe7b8d6420aa5104f"}, - {file = "numba-0.56.4-cp38-cp38-win32.whl", hash = "sha256:03fe94cd31e96185cce2fae005334a8cc712fc2ba7756e52dff8c9400718173f"}, - {file = "numba-0.56.4-cp38-cp38-win_amd64.whl", hash = "sha256:91f021145a8081f881996818474ef737800bcc613ffb1e618a655725a0f9e246"}, - {file = "numba-0.56.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:d0ae9270a7a5cc0ede63cd234b4ff1ce166c7a749b91dbbf45e0000c56d3eade"}, - {file = "numba-0.56.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c75e8a5f810ce80a0cfad6e74ee94f9fde9b40c81312949bf356b7304ef20740"}, - {file = "numba-0.56.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a12ef323c0f2101529d455cfde7f4135eaa147bad17afe10b48634f796d96abd"}, - {file = "numba-0.56.4-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:03634579d10a6129181129de293dd6b5eaabee86881369d24d63f8fe352dd6cb"}, - {file = "numba-0.56.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0240f9026b015e336069329839208ebd70ec34ae5bfbf402e4fcc8e06197528e"}, - {file = "numba-0.56.4-cp39-cp39-win32.whl", hash = "sha256:14dbbabf6ffcd96ee2ac827389afa59a70ffa9f089576500434c34abf9b054a4"}, - {file = "numba-0.56.4-cp39-cp39-win_amd64.whl", hash = "sha256:0da583c532cd72feefd8e551435747e0e0fbb3c0530357e6845fcc11e38d6aea"}, - {file = "numba-0.56.4.tar.gz", hash = "sha256:32d9fef412c81483d7efe0ceb6cf4d3310fde8b624a9cecca00f790573ac96ee"}, + {file = "numba-0.57.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2e2c14c411545e80bf0f1a33232fb0bd6aa3368f86e56eeffc7f6d3ac16ea3fd"}, + {file = "numba-0.57.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6b3382c56d805ffcdc7b46eb69a906be733dd35b84be14abba8e5fd27d7916b2"}, + {file = "numba-0.57.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:133cba9b5002bf67f6f73d9b3050d919c1be91326bbdcccfdf3259bcfb1cec0e"}, + {file = "numba-0.57.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d92a17ee849574665c5d94e9c9b862e469e1231d3dbb9e58e58b30b4bb0cbce9"}, + {file = "numba-0.57.0-cp310-cp310-win32.whl", hash = "sha256:abc90c3d303a67ae5194770a6f0d0a83edf076683b8a426349a27b91d98e00d1"}, + {file = "numba-0.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:430f43c96f866ca4fe6008d8aa28bb608911d908ff94f879e0dbad7768ef9869"}, + {file = "numba-0.57.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:069f7d8fddad4c0eb1d7534c2a18098fe50473dc77832b409176002e9011b96f"}, + {file = "numba-0.57.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:79daa130fc9e4ebd1eea0a594d1de86d8a4366989f5fab93c482246b502520db"}, + {file = "numba-0.57.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:274f4db4814ebd5ec81697acfc36df04a865b86610d7714905185b753f3f9baf"}, + {file = "numba-0.57.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0106ee441e3f69cc6f17cb470c4fcccd592e0606567d43245635d72b071ab88e"}, + {file = "numba-0.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5d31b4d95000d86ffa9652ab5bcfa0ea30e6c3fc40e610147d4f2f00116703d"}, + {file = "numba-0.57.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3e0b8de39bf17519435937b53276dfb02e2eb8bc27cd211c8eeb01ffed1cab6b"}, + {file = "numba-0.57.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:18d90fa6fcd5b796999392a8ea67f2fbccecf8dabcea726e2e721c79f40566a6"}, + {file = "numba-0.57.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4f62528c7c8c5f97e9689fd788e420b68c67ee0a1a9a7715a57fd584b7aef1e"}, + {file = "numba-0.57.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fd12cf0b431676c08057685e229ea5daaa1ec8efba2506c38671734ace49c2d7"}, + {file = "numba-0.57.0-cp38-cp38-win32.whl", hash = "sha256:e5f11b1d435fb4d1d1b68fa68ff456d632dc4bfd40b18825ff80d6081d1afb26"}, + {file = "numba-0.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:5810ed2d6d22eb3c48bedfac2187fc44bb90e05f02d47fd31059e69207ae4106"}, + {file = "numba-0.57.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eddba74493d4003a42cd61ff7feca4928a94a45553d1fddac77a5cc339f6f4f9"}, + {file = "numba-0.57.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:110be5e1213d0a3d5fc691e921a000779500620196d99cee9908fce83d1e48df"}, + {file = "numba-0.57.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f949018ab9c467d38f14fe17db4df0d4a1c664be802189e2d6c5a434d9ffd4f6"}, + {file = "numba-0.57.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9fc0cd4ec93a1e3877985e10ed5837ed2991c83aa4b7ca574caae5c8b448cc4b"}, + {file = "numba-0.57.0-cp39-cp39-win32.whl", hash = "sha256:83d4f21c98eed3001e9896a43a1ce9c825999c03f7eb39ddd1c2d07a76708392"}, + {file = "numba-0.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:9173d00c6753212b68e4fd319cfa96c21b2263949452c97b034e78ce09539dee"}, + {file = "numba-0.57.0.tar.gz", hash = "sha256:2af6d81067a5bdc13960c6d2519dbabbf4d5d597cf75d640c5aeaefd48c6420a"}, ] [package.dependencies] importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} -llvmlite = ">=0.39.0dev0,<0.40" -numpy = ">=1.18,<1.24" -setuptools = "*" +llvmlite = ">=0.40.0dev0,<0.41" +numpy = ">=1.21,<1.25" [[package]] name = "numcodecs" @@ -2825,49 +2785,49 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] [[package]] name = "packaging" -version = "23.0" +version = "23.1" description = "Core utilities for Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, - {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] [[package]] name = "pandas" -version = "2.0.0" +version = "2.0.1" description = "Powerful data structures for data analysis, time series, and statistics" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "pandas-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bbb2c5e94d6aa4e632646a3bacd05c2a871c3aa3e85c9bec9be99cb1267279f2"}, - {file = "pandas-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5337c87c4e963f97becb1217965b6b75c6fe5f54c4cf09b9a5ac52fc0bd03d3"}, - {file = "pandas-2.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ded51f7e3dd9b4f8b87f2ceb7bd1a8df2491f7ee72f7074c6927a512607199e"}, - {file = "pandas-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c858de9e9fc422d25e67e1592a6e6135d7bcf9a19fcaf4d0831a0be496bf21"}, - {file = "pandas-2.0.0-cp310-cp310-win32.whl", hash = "sha256:2d1d138848dd71b37e3cbe7cd952ff84e2ab04d8988972166e18567dcc811245"}, - {file = "pandas-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:d08e41d96bc4de6f500afe80936c68fce6099d5a434e2af7c7fd8e7c72a3265d"}, - {file = "pandas-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:24472cfc7ced511ac90608728b88312be56edc8f19b9ed885a7d2e47ffaf69c0"}, - {file = "pandas-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ffb14f50c74ee541610668137830bb93e9dfa319b1bef2cedf2814cd5ac9c70"}, - {file = "pandas-2.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c24c7d12d033a372a9daf9ff2c80f8b0af6f98d14664dbb0a4f6a029094928a7"}, - {file = "pandas-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8318de0f886e4dcb8f9f36e45a3d6a6c3d1cfdc508354da85e739090f0222991"}, - {file = "pandas-2.0.0-cp311-cp311-win32.whl", hash = "sha256:57c34b79c13249505e850d0377b722961b99140f81dafbe6f19ef10239f6284a"}, - {file = "pandas-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:8f987ec26e96a8490909bc5d98c514147236e49830cba7df8690f6087c12bbae"}, - {file = "pandas-2.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b3ba8f5dd470d8bfbc4259829589f4a32881151c49e36384d9eb982b35a12020"}, - {file = "pandas-2.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fcd471c9d9f60926ab2f15c6c29164112f458acb42280365fbefa542d0c2fc74"}, - {file = "pandas-2.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9253edfd015520ce77a9343eb7097429479c039cd3ebe81d7810ea11b4b24695"}, - {file = "pandas-2.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977326039bd1ded620001a1889e2ed4798460a6bc5a24fbaebb5f07a41c32a55"}, - {file = "pandas-2.0.0-cp38-cp38-win32.whl", hash = "sha256:78425ca12314b23356c28b16765639db10ebb7d8983f705d6759ff7fe41357fa"}, - {file = "pandas-2.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:d93b7fcfd9f3328072b250d6d001dcfeec5d3bb66c1b9c8941e109a46c0c01a8"}, - {file = "pandas-2.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:425705cee8be54db2504e8dd2a730684790b15e5904b750c367611ede49098ab"}, - {file = "pandas-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a4f789b7c012a608c08cda4ff0872fd979cb18907a37982abe884e6f529b8793"}, - {file = "pandas-2.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bb9d840bf15656805f6a3d87eea9dcb7efdf1314a82adcf7f00b820427c5570"}, - {file = "pandas-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0778ab54c8f399d83d98ffb674d11ec716449956bc6f6821891ab835848687f2"}, - {file = "pandas-2.0.0-cp39-cp39-win32.whl", hash = "sha256:70db5c278bbec0306d32bf78751ff56b9594c05a5098386f6c8a563659124f91"}, - {file = "pandas-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f3320bb55f34af4193020158ef8118ee0fb9aec7cc47d2084dbfdd868a0a24f"}, - {file = "pandas-2.0.0.tar.gz", hash = "sha256:cda9789e61b44463c1c4fe17ef755de77bcd13b09ba31c940d20f193d63a5dc8"}, + {file = "pandas-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70a996a1d2432dadedbb638fe7d921c88b0cc4dd90374eab51bb33dc6c0c2a12"}, + {file = "pandas-2.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:909a72b52175590debbf1d0c9e3e6bce2f1833c80c76d80bd1aa09188be768e5"}, + {file = "pandas-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe7914d8ddb2d54b900cec264c090b88d141a1eed605c9539a187dbc2547f022"}, + {file = "pandas-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a514ae436b23a92366fbad8365807fc0eed15ca219690b3445dcfa33597a5cc"}, + {file = "pandas-2.0.1-cp310-cp310-win32.whl", hash = "sha256:12bd6618e3cc737c5200ecabbbb5eaba8ab645a4b0db508ceeb4004bb10b060e"}, + {file = "pandas-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:2b6fe5f7ce1cba0e74188c8473c9091ead9b293ef0a6794939f8cc7947057abd"}, + {file = "pandas-2.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:00959a04a1d7bbc63d75a768540fb20ecc9e65fd80744c930e23768345a362a7"}, + {file = "pandas-2.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af2449e9e984dfad39276b885271ba31c5e0204ffd9f21f287a245980b0e4091"}, + {file = "pandas-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910df06feaf9935d05247db6de452f6d59820e432c18a2919a92ffcd98f8f79b"}, + {file = "pandas-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fa0067f2419f933101bdc6001bcea1d50812afbd367b30943417d67fbb99678"}, + {file = "pandas-2.0.1-cp311-cp311-win32.whl", hash = "sha256:7b8395d335b08bc8b050590da264f94a439b4770ff16bb51798527f1dd840388"}, + {file = "pandas-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:8db5a644d184a38e6ed40feeb12d410d7fcc36648443defe4707022da127fc35"}, + {file = "pandas-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7bbf173d364130334e0159a9a034f573e8b44a05320995127cf676b85fd8ce86"}, + {file = "pandas-2.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c0853d487b6c868bf107a4b270a823746175b1932093b537b9b76c639fc6f7e"}, + {file = "pandas-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25e23a03f7ad7211ffa30cb181c3e5f6d96a8e4cb22898af462a7333f8a74eb"}, + {file = "pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e09a53a4fe8d6ae2149959a2d02e1ef2f4d2ceb285ac48f74b79798507e468b4"}, + {file = "pandas-2.0.1-cp38-cp38-win32.whl", hash = "sha256:a2564629b3a47b6aa303e024e3d84e850d36746f7e804347f64229f8c87416ea"}, + {file = "pandas-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:03e677c6bc9cfb7f93a8b617d44f6091613a5671ef2944818469be7b42114a00"}, + {file = "pandas-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3d099ecaa5b9e977b55cd43cf842ec13b14afa1cfa51b7e1179d90b38c53ce6a"}, + {file = "pandas-2.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a37ee35a3eb6ce523b2c064af6286c45ea1c7ff882d46e10d0945dbda7572753"}, + {file = "pandas-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:320b180d125c3842c5da5889183b9a43da4ebba375ab2ef938f57bf267a3c684"}, + {file = "pandas-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18d22cb9043b6c6804529810f492ab09d638ddf625c5dea8529239607295cb59"}, + {file = "pandas-2.0.1-cp39-cp39-win32.whl", hash = "sha256:90d1d365d77d287063c5e339f49b27bd99ef06d10a8843cf00b1a49326d492c1"}, + {file = "pandas-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:99f7192d8b0e6daf8e0d0fd93baa40056684e4b4aaaef9ea78dff34168e1f2f0"}, + {file = "pandas-2.0.1.tar.gz", hash = "sha256:19b8e5270da32b41ebf12f0e7165efa7024492e9513fb46fb631c5022ae5709d"}, ] [package.dependencies] @@ -2932,14 +2892,14 @@ testing = ["docopt", "pytest (<6.0.0)"] [[package]] name = "partd" -version = "1.3.0" +version = "1.4.0" description = "Appendable key-value storage" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "partd-1.3.0-py3-none-any.whl", hash = "sha256:6393a0c898a0ad945728e34e52de0df3ae295c5aff2e2926ba7cc3c60a734a15"}, - {file = "partd-1.3.0.tar.gz", hash = "sha256:ce91abcdc6178d668bcaa431791a5a917d902341cb193f543fe445d494660485"}, + {file = "partd-1.4.0-py3-none-any.whl", hash = "sha256:7a63529348cf0dff14b986db641cd1b83c16b5cb9fc647c2851779db03282ef8"}, + {file = "partd-1.4.0.tar.gz", hash = "sha256:aa0ff35dbbcc807ae374db56332f4c1b39b46f67bf2975f5151e0b4186aed0d5"}, ] [package.dependencies] @@ -3101,30 +3061,30 @@ files = [ [[package]] name = "platformdirs" -version = "3.2.0" +version = "3.5.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.2.0-py3-none-any.whl", hash = "sha256:ebe11c0d7a805086e99506aa331612429a72ca7cd52a1f0d277dc4adc20cb10e"}, - {file = "platformdirs-3.2.0.tar.gz", hash = "sha256:d5b638ca397f25f979350ff789db335903d7ea010ab28903f57b27e1b16c2b08"}, + {file = "platformdirs-3.5.1-py3-none-any.whl", hash = "sha256:e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5"}, + {file = "platformdirs-3.5.1.tar.gz", hash = "sha256:412dae91f52a6f84830f39a8078cecd0e866cb72294a5c66808e74d5e88d251f"}, ] [package.extras] -docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.2.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"] [[package]] name = "plotly" -version = "5.14.0" +version = "5.14.1" description = "An open-source, interactive data visualization library for Python" category = "main" optional = false python-versions = ">=3.6" files = [ - {file = "plotly-5.14.0-py2.py3-none-any.whl", hash = "sha256:2e3407d93a9700beebbef66d11f63992c58e058dd808442ee54af40f98fb4940"}, - {file = "plotly-5.14.0.tar.gz", hash = "sha256:02e40264f145e524d9628fd516031976b60d74a33bbabce037ea28580bcd4e0c"}, + {file = "plotly-5.14.1-py2.py3-none-any.whl", hash = "sha256:a63f3ad9e4cc2e02902a738e5e3e7f3d1307f2732ac71a6c28f1238ed3052826"}, + {file = "plotly-5.14.1.tar.gz", hash = "sha256:bcac86d7fcba3eff7260c1eddc36ca34dae2aded10a0709808446565e0e53b93"}, ] [package.dependencies] @@ -3149,14 +3109,14 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pre-commit" -version = "3.2.1" +version = "3.3.1" description = "A framework for managing and maintaining multi-language pre-commit hooks." category = "dev" optional = false python-versions = ">=3.8" files = [ - {file = "pre_commit-3.2.1-py2.py3-none-any.whl", hash = "sha256:a06a7fcce7f420047a71213c175714216498b49ebc81fe106f7716ca265f5bb6"}, - {file = "pre_commit-3.2.1.tar.gz", hash = "sha256:b5aee7d75dbba21ee161ba641b01e7ae10c5b91967ebf7b2ab0dfae12d07e1f1"}, + {file = "pre_commit-3.3.1-py2.py3-none-any.whl", hash = "sha256:218e9e3f7f7f3271ebc355a15598a4d3893ad9fc7b57fe446db75644543323b9"}, + {file = "pre_commit-3.3.1.tar.gz", hash = "sha256:733f78c9a056cdd169baa6cd4272d51ecfda95346ef8a89bf93712706021b907"}, ] [package.dependencies] @@ -3198,49 +3158,49 @@ wcwidth = "*" [[package]] name = "protobuf" -version = "4.22.1" +version = "4.23.1" description = "" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "protobuf-4.22.1-cp310-abi3-win32.whl", hash = "sha256:85aa9acc5a777adc0c21b449dafbc40d9a0b6413ff3a4f77ef9df194be7f975b"}, - {file = "protobuf-4.22.1-cp310-abi3-win_amd64.whl", hash = "sha256:8bc971d76c03f1dd49f18115b002254f2ddb2d4b143c583bb860b796bb0d399e"}, - {file = "protobuf-4.22.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:5917412347e1da08ce2939eb5cd60650dfb1a9ab4606a415b9278a1041fb4d19"}, - {file = "protobuf-4.22.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:9e12e2810e7d297dbce3c129ae5e912ffd94240b050d33f9ecf023f35563b14f"}, - {file = "protobuf-4.22.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:953fc7904ef46900262a26374b28c2864610b60cdc8b272f864e22143f8373c4"}, - {file = "protobuf-4.22.1-cp37-cp37m-win32.whl", hash = "sha256:6e100f7bc787cd0a0ae58dbf0ab8bbf1ee7953f862b89148b6cf5436d5e9eaa1"}, - {file = "protobuf-4.22.1-cp37-cp37m-win_amd64.whl", hash = "sha256:87a6393fa634f294bf24d1cfe9fdd6bb605cbc247af81b9b10c4c0f12dfce4b3"}, - {file = "protobuf-4.22.1-cp38-cp38-win32.whl", hash = "sha256:e3fb58076bdb550e75db06ace2a8b3879d4c4f7ec9dd86e4254656118f4a78d7"}, - {file = "protobuf-4.22.1-cp38-cp38-win_amd64.whl", hash = "sha256:651113695bc2e5678b799ee5d906b5d3613f4ccfa61b12252cfceb6404558af0"}, - {file = "protobuf-4.22.1-cp39-cp39-win32.whl", hash = "sha256:67b7d19da0fda2733702c2299fd1ef6cb4b3d99f09263eacaf1aa151d9d05f02"}, - {file = "protobuf-4.22.1-cp39-cp39-win_amd64.whl", hash = "sha256:b8700792f88e59ccecfa246fa48f689d6eee6900eddd486cdae908ff706c482b"}, - {file = "protobuf-4.22.1-py3-none-any.whl", hash = "sha256:3e19dcf4adbf608924d3486ece469dd4f4f2cf7d2649900f0efcd1a84e8fd3ba"}, - {file = "protobuf-4.22.1.tar.gz", hash = "sha256:dce7a55d501c31ecf688adb2f6c3f763cf11bc0be815d1946a84d74772ab07a7"}, + {file = "protobuf-4.23.1-cp310-abi3-win32.whl", hash = "sha256:410bcc0a5b279f634d3e16082ce221dfef7c3392fac723500e2e64d1806dd2be"}, + {file = "protobuf-4.23.1-cp310-abi3-win_amd64.whl", hash = "sha256:32e78beda26d7a101fecf15d7a4a792278a0d26a31bc327ff05564a9d68ab8ee"}, + {file = "protobuf-4.23.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f9510cac91e764e86acd74e2b7f7bc5e6127a7f3fb646d7c8033cfb84fd1176a"}, + {file = "protobuf-4.23.1-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:346990f634272caac1f09efbcfbbacb23098b1f606d172534c6fa2d9758bb436"}, + {file = "protobuf-4.23.1-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:3ce113b3f3362493bddc9069c2163a38f240a9ed685ff83e7bcb756b05e1deb0"}, + {file = "protobuf-4.23.1-cp37-cp37m-win32.whl", hash = "sha256:2036a3a1e7fc27f973fa0a7888dce712393af644f4695385f117886abc792e39"}, + {file = "protobuf-4.23.1-cp37-cp37m-win_amd64.whl", hash = "sha256:3b8905eafe4439076e1f58e9d1fa327025fd2777cf90f14083092ae47f77b0aa"}, + {file = "protobuf-4.23.1-cp38-cp38-win32.whl", hash = "sha256:5b9cd6097e6acae48a68cb29b56bc79339be84eca65b486910bb1e7a30e2b7c1"}, + {file = "protobuf-4.23.1-cp38-cp38-win_amd64.whl", hash = "sha256:decf119d54e820f298ee6d89c72d6b289ea240c32c521f00433f9dc420595f38"}, + {file = "protobuf-4.23.1-cp39-cp39-win32.whl", hash = "sha256:91fac0753c3c4951fbb98a93271c43cc7cf3b93cf67747b3e600bb1e5cc14d61"}, + {file = "protobuf-4.23.1-cp39-cp39-win_amd64.whl", hash = "sha256:ac50be82491369a9ec3710565777e4da87c6d2e20404e0abb1f3a8f10ffd20f0"}, + {file = "protobuf-4.23.1-py3-none-any.whl", hash = "sha256:65f0ac96ef67d7dd09b19a46aad81a851b6f85f89725577f16de38f2d68ad477"}, + {file = "protobuf-4.23.1.tar.gz", hash = "sha256:95789b569418a3e32a53f43d7763be3d490a831e9c08042539462b6d972c2d7e"}, ] [[package]] name = "psutil" -version = "5.9.4" +version = "5.9.5" description = "Cross-platform lib for process and system monitoring in Python." category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ - {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"}, - {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"}, - {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"}, - {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"}, - {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"}, - {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"}, - {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"}, - {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"}, - {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"}, - {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"}, - {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"}, - {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"}, - {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"}, - {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"}, + {file = "psutil-5.9.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:be8929ce4313f9f8146caad4272f6abb8bf99fc6cf59344a3167ecd74f4f203f"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab8ed1a1d77c95453db1ae00a3f9c50227ebd955437bcf2a574ba8adbf6a74d5"}, + {file = "psutil-5.9.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:4aef137f3345082a3d3232187aeb4ac4ef959ba3d7c10c33dd73763fbc063da4"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ea8518d152174e1249c4f2a1c89e3e6065941df2fa13a1ab45327716a23c2b48"}, + {file = "psutil-5.9.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:acf2aef9391710afded549ff602b5887d7a2349831ae4c26be7c807c0a39fac4"}, + {file = "psutil-5.9.5-cp27-none-win32.whl", hash = "sha256:5b9b8cb93f507e8dbaf22af6a2fd0ccbe8244bf30b1baad6b3954e935157ae3f"}, + {file = "psutil-5.9.5-cp27-none-win_amd64.whl", hash = "sha256:8c5f7c5a052d1d567db4ddd231a9d27a74e8e4a9c3f44b1032762bd7b9fdcd42"}, + {file = "psutil-5.9.5-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3c6f686f4225553615612f6d9bc21f1c0e305f75d7d8454f9b46e901778e7217"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a7dd9997128a0d928ed4fb2c2d57e5102bb6089027939f3b722f3a210f9a8da"}, + {file = "psutil-5.9.5-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89518112647f1276b03ca97b65cc7f64ca587b1eb0278383017c2a0dcc26cbe4"}, + {file = "psutil-5.9.5-cp36-abi3-win32.whl", hash = "sha256:104a5cc0e31baa2bcf67900be36acde157756b9c44017b86b2c049f11957887d"}, + {file = "psutil-5.9.5-cp36-abi3-win_amd64.whl", hash = "sha256:b258c0c1c9d145a1d5ceffab1134441c4c5113b2417fafff7315a917a026c3c9"}, + {file = "psutil-5.9.5-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:c607bb3b57dc779d55e1554846352b4e358c10fff3abf3514a7a6601beebdb30"}, + {file = "psutil-5.9.5.tar.gz", hash = "sha256:5410638e4df39c54d957fc51ce03048acd8e6d60abc0f5107af51e5fb566eb3c"}, ] [package.extras] @@ -3275,30 +3235,30 @@ tests = ["pytest"] [[package]] name = "pyasn1" -version = "0.4.8" -description = "ASN.1 types and codecs" +version = "0.5.0" +description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" category = "main" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, - {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, + {file = "pyasn1-0.5.0-py2.py3-none-any.whl", hash = "sha256:87a2121042a1ac9358cabcaf1d07680ff97ee6404333bacca15f76aa8ad01a57"}, + {file = "pyasn1-0.5.0.tar.gz", hash = "sha256:97b7290ca68e62a832558ec3976f15cbf911bf5d7c7039d8b861c2a0ece69fde"}, ] [[package]] name = "pyasn1-modules" -version = "0.2.8" -description = "A collection of ASN.1-based protocols modules." +version = "0.3.0" +description = "A collection of ASN.1-based protocols modules" category = "main" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, - {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, + {file = "pyasn1_modules-0.3.0-py2.py3-none-any.whl", hash = "sha256:d3ccd6ed470d9ffbc716be08bd90efbd44d0734bc9303818f7336070984a162d"}, + {file = "pyasn1_modules-0.3.0.tar.gz", hash = "sha256:5bd01446b736eb9d31512a30d46c1ac3395d676c6f3cafa4c03eb54b9925631c"}, ] [package.dependencies] -pyasn1 = ">=0.4.6,<0.5.0" +pyasn1 = ">=0.4.6,<0.6.0" [[package]] name = "pycparser" @@ -3314,14 +3274,14 @@ files = [ [[package]] name = "pygments" -version = "2.14.0" +version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." category = "main" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "Pygments-2.14.0-py3-none-any.whl", hash = "sha256:fa7bd7bd2771287c0de303af8bfdfc731f51bd2c6a47ab69d117138893b82717"}, - {file = "Pygments-2.14.0.tar.gz", hash = "sha256:b3ed06a9e8ac9a9aae5a6f5dbe78a8a58655d17b43b93c078f094ddc476ae297"}, + {file = "Pygments-2.15.1-py3-none-any.whl", hash = "sha256:db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1"}, + {file = "Pygments-2.15.1.tar.gz", hash = "sha256:8ace4d3c1dd481894b2005f560ead0f9f19ee64fe983366be1a21e171d12775c"}, ] [package.extras] @@ -3366,18 +3326,17 @@ files = [ [[package]] name = "pytest" -version = "7.2.2" +version = "7.3.1" description = "pytest: simple powerful testing with Python" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"}, - {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"}, + {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, + {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, ] [package.dependencies] -attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" @@ -3386,7 +3345,7 @@ pluggy = ">=0.12,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] [[package]] name = "pytest-cases" @@ -3425,14 +3384,14 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale [[package]] name = "pytest-xdist" -version = "3.2.1" +version = "3.3.0" description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-xdist-3.2.1.tar.gz", hash = "sha256:1849bd98d8b242b948e472db7478e090bf3361912a8fed87992ed94085f54727"}, - {file = "pytest_xdist-3.2.1-py3-none-any.whl", hash = "sha256:37290d161638a20b672401deef1cba812d110ac27e35d213f091d15b8beb40c9"}, + {file = "pytest-xdist-3.3.0.tar.gz", hash = "sha256:d42c9efb388da35480878ef4b2993704c6cea800c8bafbe85a8cdc461baf0748"}, + {file = "pytest_xdist-3.3.0-py3-none-any.whl", hash = "sha256:76f7683d4f993eaff91c9cb0882de0465c4af9c6dd3debc903833484041edc1a"}, ] [package.dependencies] @@ -3665,21 +3624,21 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "requests" -version = "2.28.2" +version = "2.30.0" description = "Python HTTP for Humans." category = "main" optional = false -python-versions = ">=3.7, <4" +python-versions = ">=3.7" files = [ - {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, - {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, + {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"}, + {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"}, ] [package.dependencies] certifi = ">=2017.4.17" charset-normalizer = ">=2,<4" idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<1.27" +urllib3 = ">=1.21.1,<3" [package.extras] socks = ["PySocks (>=1.5.6,!=1.5.7)"] @@ -3763,29 +3722,29 @@ pyasn1 = ">=0.1.3" [[package]] name = "scikit-allel" -version = "1.3.5" +version = "1.3.6" description = "A Python package for exploring and analysing genetic variation data." category = "main" optional = false python-versions = "*" files = [ - {file = "scikit-allel-1.3.5.tar.gz", hash = "sha256:f2a1fdcd00880fbea6e0288dee5b48b4bcd8730b3fa631ff2b27b4679e45e8ed"}, - {file = "scikit_allel-1.3.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6150cda81d0b3d09767ea27f7344c0eb6e02ca1d1d64d6b930511d2758145ecb"}, - {file = "scikit_allel-1.3.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:437b888ea48255fce788e4bc3c717d33c00125f60ec1c727399959464028793b"}, - {file = "scikit_allel-1.3.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:cc02bdbaf252d360362496c7cffb69fd8e261f1397c03bf524b9ad37031da123"}, - {file = "scikit_allel-1.3.5-cp36-cp36m-win_amd64.whl", hash = "sha256:befa08cf2ecdbc3b256570007e846643ecf3838050425696ec502cf6591ac8e9"}, - {file = "scikit_allel-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b3249db4e557820bcf8cb1b912e9cd4000c8ab02eec7d90b3da8ca0d00e887bc"}, - {file = "scikit_allel-1.3.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:c615c825edc19f40e0e413db34f90dee50fead46bc7ed380c89098619ca2bff7"}, - {file = "scikit_allel-1.3.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:21c10744d306b36567075f738b2549ce6d20994ab16d8162f70f2278a6db84ba"}, - {file = "scikit_allel-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:52d9f17b74e204c0d6a01fc16b03d3d9d95feae9ec147b90c189c4f344e290e2"}, - {file = "scikit_allel-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5b031c79139d1cc793771692108440a72052d834065cf9d45375c6917cee8988"}, - {file = "scikit_allel-1.3.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:680485ae4bc9fa753740055070fa2da39a8875023e549da3946d9bd4372353f6"}, - {file = "scikit_allel-1.3.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ebb52e7f07bf35aef42879883f2fe568855242c5cfc0154c0925444fe51f0592"}, - {file = "scikit_allel-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:8658eef2306702338852dec10d96e9ef165ca6c54708c6356ecffc51e0f4b136"}, - {file = "scikit_allel-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3a7fb5cb460dfb532b41c99f3d004b3533f9d6353f9d2a24711f1a7a224f674c"}, - {file = "scikit_allel-1.3.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9a222c684868fb2cd91164ae472a4b78912c00bfc5eb84876f84635f829251c4"}, - {file = "scikit_allel-1.3.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f951db48bdaa94df437c82d23dbe0ebdbd94e54890e5aa4cb4c4b11743b10795"}, - {file = "scikit_allel-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:5eafb694faa891668934cc0402af91e3b358c7202ee9ad554b844bc5049f6b12"}, + {file = "scikit-allel-1.3.6.tar.gz", hash = "sha256:b41e6cc8afa97d4aa9e4d4671bdef7bcd00d15f0af00374d678bb5e512bdd453"}, + {file = "scikit_allel-1.3.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2fffdc9e8057c470e9e6bcf3f6d9ea923a5f7a45feda71dbf060b9b577580c6"}, + {file = "scikit_allel-1.3.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cca7e799c3c4eb7f5fda8c9cebc783f27f595f0f80e7fed3c65a0a94d4c5f82c"}, + {file = "scikit_allel-1.3.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bd35e811ff0b9a9cb8dd55c1bfa9034a4646fc1d73f2c2d6d4034aaf19dc54c2"}, + {file = "scikit_allel-1.3.6-cp310-cp310-win_amd64.whl", hash = "sha256:517d43683400918a274375f743855884e0cb5906ec3ffa29308e354d21f45e4d"}, + {file = "scikit_allel-1.3.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d8d02628a422aa4fa8c724af68cd2068dba83d869a0cb008295a11ffd5c8216"}, + {file = "scikit_allel-1.3.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:621c8117b204847077b8bea59042a001a576e5a3d6ebe1bffa49501ee28e545e"}, + {file = "scikit_allel-1.3.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3f1f7328804d0f3a1c5e874c6463a825b42ed1eda966c2571af8910a5c00943c"}, + {file = "scikit_allel-1.3.6-cp311-cp311-win_amd64.whl", hash = "sha256:90da7e2a335f7b66184204ae55c42d78ea44e2188267a728716370e88735adb9"}, + {file = "scikit_allel-1.3.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5f70128ab38eb815b239c1381fa9b5be12e37ac22fa0eee04074dc506ffe9146"}, + {file = "scikit_allel-1.3.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da1dd0f427935414e0b73e4bd4d3580bf1866f49cd7fc564e5a405b372e6906b"}, + {file = "scikit_allel-1.3.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:9739443c47cf52cedaacb694f5325c922aad095af4906af61b0cfb6e1f54a1a6"}, + {file = "scikit_allel-1.3.6-cp38-cp38-win_amd64.whl", hash = "sha256:5276c45fae28c2d2108962a80bb27109adfc7cc349382b5c9156ab14731fcdad"}, + {file = "scikit_allel-1.3.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1e02527da69abe413f0da78f9aef5c50fdf88be3a4a5b7f13f2530ae4b2c861"}, + {file = "scikit_allel-1.3.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3889fdbba19352fd32a807d8a4d82080dfc04b6182ada330341ef95a0361da4e"}, + {file = "scikit_allel-1.3.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d8536a51b2a27eeac595a6850c0d29fd1ef15a6bf361849b65e4c569c686f211"}, + {file = "scikit_allel-1.3.6-cp39-cp39-win_amd64.whl", hash = "sha256:9bd1bf4db229e3d0b17be6c9ab9bc1e199139ab5f6a695d718175d9533e960ed"}, ] [package.dependencies] @@ -3795,42 +3754,6 @@ numpy = "*" [package.extras] full = ["h5py", "hmmlearn", "matplotlib", "nose", "numexpr", "pandas", "pomegranate", "scikit-learn", "scipy", "seaborn", "zarr"] -[[package]] -name = "scipy" -version = "1.8.1" -description = "SciPy: Scientific Library for Python" -category = "main" -optional = false -python-versions = ">=3.8,<3.11" -files = [ - {file = "scipy-1.8.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:65b77f20202599c51eb2771d11a6b899b97989159b7975e9b5259594f1d35ef4"}, - {file = "scipy-1.8.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e013aed00ed776d790be4cb32826adb72799c61e318676172495383ba4570aa4"}, - {file = "scipy-1.8.1-cp310-cp310-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:02b567e722d62bddd4ac253dafb01ce7ed8742cf8031aea030a41414b86c1125"}, - {file = "scipy-1.8.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1da52b45ce1a24a4a22db6c157c38b39885a990a566748fc904ec9f03ed8c6ba"}, - {file = "scipy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0aa8220b89b2e3748a2836fbfa116194378910f1a6e78e4675a095bcd2c762d"}, - {file = "scipy-1.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:4e53a55f6a4f22de01ffe1d2f016e30adedb67a699a310cdcac312806807ca81"}, - {file = "scipy-1.8.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:28d2cab0c6ac5aa131cc5071a3a1d8e1366dad82288d9ec2ca44df78fb50e649"}, - {file = "scipy-1.8.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:6311e3ae9cc75f77c33076cb2794fb0606f14c8f1b1c9ff8ce6005ba2c283621"}, - {file = "scipy-1.8.1-cp38-cp38-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:3b69b90c9419884efeffaac2c38376d6ef566e6e730a231e15722b0ab58f0328"}, - {file = "scipy-1.8.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6cc6b33139eb63f30725d5f7fa175763dc2df6a8f38ddf8df971f7c345b652dc"}, - {file = "scipy-1.8.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c4e3ae8a716c8b3151e16c05edb1daf4cb4d866caa385e861556aff41300c14"}, - {file = "scipy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23b22fbeef3807966ea42d8163322366dd89da9bebdc075da7034cee3a1441ca"}, - {file = "scipy-1.8.1-cp38-cp38-win32.whl", hash = "sha256:4b93ec6f4c3c4d041b26b5f179a6aab8f5045423117ae7a45ba9710301d7e462"}, - {file = "scipy-1.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:70ebc84134cf0c504ce6a5f12d6db92cb2a8a53a49437a6bb4edca0bc101f11c"}, - {file = "scipy-1.8.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f3e7a8867f307e3359cc0ed2c63b61a1e33a19080f92fe377bc7d49f646f2ec1"}, - {file = "scipy-1.8.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:2ef0fbc8bcf102c1998c1f16f15befe7cffba90895d6e84861cd6c6a33fb54f6"}, - {file = "scipy-1.8.1-cp39-cp39-macosx_12_0_universal2.macosx_10_9_x86_64.whl", hash = "sha256:83606129247e7610b58d0e1e93d2c5133959e9cf93555d3c27e536892f1ba1f2"}, - {file = "scipy-1.8.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:93d07494a8900d55492401917a119948ed330b8c3f1d700e0b904a578f10ead4"}, - {file = "scipy-1.8.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3b3c8924252caaffc54d4a99f1360aeec001e61267595561089f8b5900821bb"}, - {file = "scipy-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70de2f11bf64ca9921fda018864c78af7147025e467ce9f4a11bc877266900a6"}, - {file = "scipy-1.8.1-cp39-cp39-win32.whl", hash = "sha256:1166514aa3bbf04cb5941027c6e294a000bba0cf00f5cdac6c77f2dad479b434"}, - {file = "scipy-1.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:9dd4012ac599a1e7eb63c114d1eee1bcfc6dc75a29b589ff0ad0bb3d9412034f"}, - {file = "scipy-1.8.1.tar.gz", hash = "sha256:9e3fb1b0e896f14a85aa9a28d5f755daaeeb54c897b746df7a55ccb02b340f33"}, -] - -[package.dependencies] -numpy = ">=1.17.3,<1.25.0" - [[package]] name = "scipy" version = "1.10.1" @@ -3872,14 +3795,14 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo [[package]] name = "send2trash" -version = "1.8.0" -description = "Send file to trash natively under Mac OS X, Windows and Linux." +version = "1.8.2" +description = "Send file to trash natively under Mac OS X, Windows and Linux" category = "dev" optional = false -python-versions = "*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" files = [ - {file = "Send2Trash-1.8.0-py3-none-any.whl", hash = "sha256:f20eaadfdb517eaca5ce077640cb261c7d2698385a6a0f072a4a5447fd49fa08"}, - {file = "Send2Trash-1.8.0.tar.gz", hash = "sha256:d2c24762fd3759860a0aff155e45871447ea58d2be6bdd39b5c8f966a0c99c2d"}, + {file = "Send2Trash-1.8.2-py3-none-any.whl", hash = "sha256:a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679"}, + {file = "Send2Trash-1.8.2.tar.gz", hash = "sha256:c132d59fa44b9ca2b1699af5c86f57ce9f4c5eb56629d5d55fbb7a35f84e2312"}, ] [package.extras] @@ -3889,14 +3812,14 @@ win32 = ["pywin32"] [[package]] name = "setuptools" -version = "67.6.1" +version = "67.7.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "setuptools-67.6.1-py3-none-any.whl", hash = "sha256:e728ca814a823bf7bf60162daf9db95b93d532948c4c0bea762ce62f60189078"}, - {file = "setuptools-67.6.1.tar.gz", hash = "sha256:257de92a9d50a60b8e22abfcbb771571fde0dbf3ec234463212027a4eeecbe9a"}, + {file = "setuptools-67.7.2-py3-none-any.whl", hash = "sha256:23aaf86b85ca52ceb801d32703f12d77517b2556af839621c641fca11287952b"}, + {file = "setuptools-67.7.2.tar.gz", hash = "sha256:f104fa03692a2602fa0fec6c6a9e63b6c8a968de13e17c026957dd1f53d80990"}, ] [package.extras] @@ -3918,14 +3841,14 @@ files = [ [[package]] name = "snakeviz" -version = "2.1.1" +version = "2.2.0" description = "A web-based viewer for Python profiler output" category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "snakeviz-2.1.1-py2.py3-none-any.whl", hash = "sha256:931142dc927101c9a4b6e89bc0577ff1a3d1886b483a04e6af70c31d2c3dce19"}, - {file = "snakeviz-2.1.1.tar.gz", hash = "sha256:0d96c006304f095cb4b3fb7ed98bb866ca35a7ca4ab9020bbc27d295ee4c94d9"}, + {file = "snakeviz-2.2.0-py2.py3-none-any.whl", hash = "sha256:569e2d71c47f80a886aa6e70d6405cb6d30aa3520969ad956b06f824c5f02b8e"}, + {file = "snakeviz-2.2.0.tar.gz", hash = "sha256:7bfd00be7ae147eb4a170a471578e1cd3f41f803238958b6b8efcf2c698a6aa9"}, ] [package.dependencies] @@ -3945,14 +3868,14 @@ files = [ [[package]] name = "soupsieve" -version = "2.4" +version = "2.4.1" description = "A modern CSS selector implementation for Beautiful Soup." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "soupsieve-2.4-py3-none-any.whl", hash = "sha256:49e5368c2cda80ee7e84da9dbe3e110b70a4575f196efb74e51b94549d921955"}, - {file = "soupsieve-2.4.tar.gz", hash = "sha256:e28dba9ca6c7c00173e34e4ba57448f0688bb681b7c5e8bf4971daafc093d69a"}, + {file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"}, + {file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"}, ] [[package]] @@ -3977,55 +3900,48 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "statsmodels" -version = "0.13.5" +version = "0.14.0" description = "Statistical computations and models for Python" category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "statsmodels-0.13.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c75319fddded9507cc310fc3980e4ae4d64e3ff37b322ad5e203a84f89d85203"}, - {file = "statsmodels-0.13.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f148920ef27c7ba69a5735724f65de9422c0c8bcef71b50c846b823ceab8840"}, - {file = "statsmodels-0.13.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cc4d3e866bfe0c4f804bca362d0e7e29d24b840aaba8d35a754387e16d2a119"}, - {file = "statsmodels-0.13.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:072950d6f7820a6b0bd6a27b2d792a6d6f952a1d2f62f0dcf8dd808799475855"}, - {file = "statsmodels-0.13.5-cp310-cp310-win_amd64.whl", hash = "sha256:159ae9962c61b31dcffe6356d72ae3d074bc597ad9273ec93ae653fe607b8516"}, - {file = "statsmodels-0.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9061c0d5ee4f3038b590afedd527a925e5de27195dc342381bac7675b2c5efe4"}, - {file = "statsmodels-0.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e1d89cba5fafc1bf8e75296fdfad0b619de2bfb5e6c132913991d207f3ead675"}, - {file = "statsmodels-0.13.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01bc16e7c66acb30cd3dda6004c43212c758223d1966131226024a5c99ec5a7e"}, - {file = "statsmodels-0.13.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d5cd9ab5de2c7489b890213cba2aec3d6468eaaec547041c2dfcb1e03411f7e"}, - {file = "statsmodels-0.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:857d5c0564a68a7ef77dc2252bb43c994c0699919b4e1f06a9852c2fbb588765"}, - {file = "statsmodels-0.13.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5a5348b2757ab31c5c31b498f25eff2ea3c42086bef3d3b88847c25a30bdab9c"}, - {file = "statsmodels-0.13.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b21648e3a8e7514839ba000a48e495cdd8bb55f1b71c608cf314b05541e283b"}, - {file = "statsmodels-0.13.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b829eada6cec07990f5e6820a152af4871c601fd458f76a896fb79ae2114985"}, - {file = "statsmodels-0.13.5-cp37-cp37m-win_amd64.whl", hash = "sha256:872b3a8186ef20f647c7ab5ace512a8fc050148f3c2f366460ab359eec3d9695"}, - {file = "statsmodels-0.13.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc1abb81d24f56425febd5a22bb852a1b98e53b80c4a67f50938f9512f154141"}, - {file = "statsmodels-0.13.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a2c46f1b0811a9736db37badeb102c0903f33bec80145ced3aa54df61aee5c2b"}, - {file = "statsmodels-0.13.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:947f79ba9662359f1cfa6e943851f17f72b06e55f4a7c7a2928ed3bc57ed6cb8"}, - {file = "statsmodels-0.13.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:046251c939c51e7632bcc8c6d6f31b8ca0eaffdf726d2498463f8de3735c9a82"}, - {file = "statsmodels-0.13.5-cp38-cp38-win_amd64.whl", hash = "sha256:84f720e8d611ef8f297e6d2ffa7248764e223ef7221a3fc136e47ae089609611"}, - {file = "statsmodels-0.13.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b0d1d24e4adf96ec3c64d9a027dcee2c5d5096bb0dad33b4d91034c0a3c40371"}, - {file = "statsmodels-0.13.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0f0e5c9c58fb6cba41db01504ec8dd018c96a95152266b7d5d67e0de98840474"}, - {file = "statsmodels-0.13.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b034aa4b9ad4f4d21abc4dd4841be0809a446db14c7aa5c8a65090aea9f1143"}, - {file = "statsmodels-0.13.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73f97565c29241e839ffcef74fa995afdfe781910ccc27c189e5890193085958"}, - {file = "statsmodels-0.13.5-cp39-cp39-win_amd64.whl", hash = "sha256:2ff331e508f2d1a53d3a188305477f4cf05cd8c52beb6483885eb3d51c8be3ad"}, - {file = "statsmodels-0.13.5.tar.gz", hash = "sha256:593526acae1c0fda0ea6c48439f67c3943094c542fe769f8b90fe9e6c6cc4871"}, + {file = "statsmodels-0.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:16bfe0c96a53b20fa19067e3b6bd2f1d39e30d4891ea0d7bc20734a0ae95942d"}, + {file = "statsmodels-0.14.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5a6a0a1a06ff79be8aa89c8494b33903442859add133f0dda1daf37c3c71682e"}, + {file = "statsmodels-0.14.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77b3cd3a5268ef966a0a08582c591bd29c09c88b4566c892a7c087935234f285"}, + {file = "statsmodels-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c64ebe9cf376cba0c31aed138e15ed179a1d128612dd241cdf299d159e5e882"}, + {file = "statsmodels-0.14.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb471f757fc45102a87e5d86e87dc2c8c78b34ad4f203679a46520f1d863b9da"}, + {file = "statsmodels-0.14.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:582f9e41092e342aaa04920d17cc3f97240e3ee198672f194719b5a3d08657d6"}, + {file = "statsmodels-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ebe885ccaa64b4bc5ad49ac781c246e7a594b491f08ab4cfd5aa456c363a6f6"}, + {file = "statsmodels-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b587ee5d23369a0e881da6e37f78371dce4238cf7638a455db4b633a1a1c62d6"}, + {file = "statsmodels-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef7fa4813c7a73b0d8a0c830250f021c102c71c95e9fe0d6877bcfb56d38b8c"}, + {file = "statsmodels-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:a6ad7b8aadccd4e4dd7f315a07bef1bca41d194eeaf4ec600d20dea02d242fce"}, + {file = "statsmodels-0.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3757542c95247e4ab025291a740efa5da91dc11a05990c033d40fce31c450dc9"}, + {file = "statsmodels-0.14.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:de489e3ed315bdba55c9d1554a2e89faa65d212e365ab81bc323fa52681fc60e"}, + {file = "statsmodels-0.14.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76e290f4718177bffa8823a780f3b882d56dd64ad1c18cfb4bc8b5558f3f5757"}, + {file = "statsmodels-0.14.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71054f9dbcead56def14e3c9db6f66f943110fdfb19713caf0eb0f08c1ec03fd"}, + {file = "statsmodels-0.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:d7fda067837df94e0a614d93d3a38fb6868958d37f7f50afe2a534524f2660cb"}, + {file = "statsmodels-0.14.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1c7724ad573af26139a98393ae64bc318d1b19762b13442d96c7a3e793f495c3"}, + {file = "statsmodels-0.14.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b0a135f3bfdeec987e36e3b3b4c53e0bb87a8d91464d2fcc4d169d176f46fdb"}, + {file = "statsmodels-0.14.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce28eb1c397dba437ec39b9ab18f2101806f388c7a0cf9cdfd8f09294ad1c799"}, + {file = "statsmodels-0.14.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b1c768dd94cc5ba8398121a632b673c625491aa7ed627b82cb4c880a25563f"}, + {file = "statsmodels-0.14.0-cp39-cp39-win_amd64.whl", hash = "sha256:8d1e3e10dfbfcd58119ba5a4d3c7d519182b970a2aebaf0b6f539f55ae16058d"}, + {file = "statsmodels-0.14.0.tar.gz", hash = "sha256:6875c7d689e966d948f15eb816ab5616f4928706b180cf470fd5907ab6f647a4"}, ] [package.dependencies] numpy = [ - {version = ">=1.17", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, + {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, ] packaging = ">=21.3" -pandas = ">=0.25" +pandas = ">=1.0" patsy = ">=0.5.2" -scipy = [ - {version = ">=1.3", markers = "python_version > \"3.9\" and python_version < \"3.12\" or platform_system != \"Windows\" and python_version < \"3.12\" or platform_machine != \"x86\" and python_version < \"3.12\""}, - {version = ">=1.3,<1.9", markers = "python_version == \"3.8\" and platform_system == \"Windows\" and platform_machine == \"x86\" or python_version == \"3.9\" and platform_system == \"Windows\" and platform_machine == \"x86\""}, -] +scipy = ">=1.4,<1.9.2 || >1.9.2" [package.extras] -build = ["cython (>=0.29.32)"] -develop = ["Jinja2", "colorama", "cython (>=0.29.32)", "cython (>=0.29.32,<3.0.0)", "flake8", "isort", "joblib", "matplotlib (>=3)", "oldest-supported-numpy (>=2022.4.18)", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=7.0.0,<7.1.0)"] +build = ["cython (>=0.29.26)"] +develop = ["colorama", "cython (>=0.29.26)", "cython (>=0.29.28,<3.0.0)", "flake8", "isort", "joblib", "matplotlib (>=3)", "oldest-supported-numpy (>=2022.4.18)", "pytest (>=7.0.1,<7.1.0)", "pytest-randomly", "pytest-xdist", "pywinpty", "setuptools-scm[toml] (>=7.0.0,<7.1.0)"] docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "numpydoc", "pandas-datareader", "sphinx"] [[package]] @@ -4109,23 +4025,23 @@ files = [ [[package]] name = "tornado" -version = "6.2" +version = "6.3.2" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." category = "main" optional = false -python-versions = ">= 3.7" +python-versions = ">= 3.8" files = [ - {file = "tornado-6.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72"}, - {file = "tornado-6.2-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9"}, - {file = "tornado-6.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac"}, - {file = "tornado-6.2-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75"}, - {file = "tornado-6.2-cp37-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e"}, - {file = "tornado-6.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8"}, - {file = "tornado-6.2-cp37-abi3-musllinux_1_1_i686.whl", hash = "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b"}, - {file = "tornado-6.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca"}, - {file = "tornado-6.2-cp37-abi3-win32.whl", hash = "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23"}, - {file = "tornado-6.2-cp37-abi3-win_amd64.whl", hash = "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b"}, - {file = "tornado-6.2.tar.gz", hash = "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, + {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, + {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, + {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, ] [[package]] @@ -4183,6 +4099,26 @@ traitlets = ">=4.2.2" [package.extras] test = ["numpy", "pandas", "pytest", "xarray"] +[[package]] +name = "typeguard" +version = "4.0.0" +description = "Run-time type checker for Python" +category = "main" +optional = false +python-versions = ">=3.7.4" +files = [ + {file = "typeguard-4.0.0-py3-none-any.whl", hash = "sha256:c4a40af0ba8a41077221271b46d0a6d8d46045443e4d887887c69254ca861952"}, + {file = "typeguard-4.0.0.tar.gz", hash = "sha256:194fb3dbcb06ea9caf7088f3befee014de57961689f9c859ac5239b1ef61d987"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} +typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["mypy (>=1.2.0)", "pytest (>=7)"] + [[package]] name = "typing-extensions" version = "4.5.0" @@ -4241,24 +4177,24 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "virtualenv" -version = "20.21.0" +version = "20.23.0" description = "Virtual Python Environment builder" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.21.0-py3-none-any.whl", hash = "sha256:31712f8f2a17bd06234fa97fdf19609e789dd4e3e4bf108c3da71d710651adbc"}, - {file = "virtualenv-20.21.0.tar.gz", hash = "sha256:f50e3e60f990a0757c9b68333c9fdaa72d7188caa417f96af9e52407831a3b68"}, + {file = "virtualenv-20.23.0-py3-none-any.whl", hash = "sha256:6abec7670e5802a528357fdc75b26b9f57d5d92f29c5462ba0fbe45feacc685e"}, + {file = "virtualenv-20.23.0.tar.gz", hash = "sha256:a85caa554ced0c0afbd0d638e7e2d7b5f92d23478d05d17a76daeac8f279f924"}, ] [package.dependencies] distlib = ">=0.3.6,<1" -filelock = ">=3.4.1,<4" -platformdirs = ">=2.4,<4" +filelock = ">=3.11,<4" +platformdirs = ">=3.2,<4" [package.extras] -docs = ["furo (>=2022.12.7)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=22.12)"] -test = ["covdefaults (>=2.2.2)", "coverage (>=7.1)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23)", "pytest (>=7.2.1)", "pytest-env (>=0.8.1)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)"] +docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=22.12)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.3.1)", "pytest-env (>=0.8.1)", "pytest-freezegun (>=0.4.2)", "pytest-mock (>=3.10)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=67.7.1)", "time-machine (>=2.9)"] [[package]] name = "wcwidth" @@ -4319,21 +4255,21 @@ test = ["websockets"] [[package]] name = "werkzeug" -version = "2.2.3" +version = "2.3.4" description = "The comprehensive WSGI web application library." category = "main" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "Werkzeug-2.2.3-py3-none-any.whl", hash = "sha256:56433961bc1f12533306c624f3be5e744389ac61d722175d543e1751285da612"}, - {file = "Werkzeug-2.2.3.tar.gz", hash = "sha256:2e1ccc9417d4da358b9de6f174e3ac094391ea1d4fbef2d667865d819dfd0afe"}, + {file = "Werkzeug-2.3.4-py3-none-any.whl", hash = "sha256:48e5e61472fee0ddee27ebad085614ebedb7af41e88f687aaf881afb723a162f"}, + {file = "Werkzeug-2.3.4.tar.gz", hash = "sha256:1d5a58e0377d1fe39d061a5de4469e414e78ccb1e1e59c0f5ad6fa1c36c52b76"}, ] [package.dependencies] MarkupSafe = ">=2.1.1" [package.extras] -watchdog = ["watchdog"] +watchdog = ["watchdog (>=2.3)"] [[package]] name = "widgetsnbextension" @@ -4384,190 +4320,94 @@ files = [ {file = "xyzservices-2023.2.0.tar.gz", hash = "sha256:3342bba410d7941290eed0e58a2e5aadb0f7b97863ec4283b283c406ee723a28"}, ] -[[package]] -name = "y-py" -version = "0.5.9" -description = "Python bindings for the Y-CRDT built from yrs (Rust)" -category = "dev" -optional = false -python-versions = "*" -files = [ - {file = "y_py-0.5.9-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:afa9a11aa2880dd8689894f3269b653e6d3bd1956963d5329be9a5bf021dab62"}, - {file = "y_py-0.5.9-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:e370ce076781adea161b04d2f666e8b4f89bc7e8927ef842fbb0283d3bfa73e0"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b67dad339f9b6701f74ff7a6e901c7909eca4eea02cf955b28d87a42650bd1be"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae82a6d9cbaff8cb7505e81b5b7f9cd7756bb7e7110aef7914375fe56b012a90"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c7ca64a2a97f708569dcabd55865915943e30267bf6d26c4d212d005951efe62"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55098440e32339c2dc3d652fb36bb77a4927dee5fd4ab0cb1fe12fdd163fd4f5"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9052a814e8b7ec756371a191f38de68b956437e0bb429c2dd503e658f298f9"}, - {file = "y_py-0.5.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:95d13b38c9055d607565b77cbae12e2bf0c1671c5cb8f2ee2e1230d41d2d6d34"}, - {file = "y_py-0.5.9-cp310-none-win32.whl", hash = "sha256:5dbd8d177ec7b9fef4a7b6d22eb2f8d5606fd5aac31cf2eab0dc18f0b3504c7c"}, - {file = "y_py-0.5.9-cp310-none-win_amd64.whl", hash = "sha256:d373c6bb8e21d5f7ec0833b76fa1ab480086ada602ef5bbf4724a25a21a00b6a"}, - {file = "y_py-0.5.9-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:f8f238144a302f17eb26b122cad9382fcff5ec6653b8a562130b9a5e44010098"}, - {file = "y_py-0.5.9-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:25637e3d011ca6f877a24f3083ff2549d1d619406d7e8a1455c445527205046c"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ffebe5e62cbfee6e24593927dedba77dc13ac4cfb9c822074ab566b1fb63d59"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b0ed760e6aa5316227a0ba2d5d29634a4ef2d72c8bc55169ac01664e17e4b536"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91be189fae8ba242528333e266e38d65cae3d9a09fe45867fab8578a3ddf2ea2"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3ae6d22b7cc599220a26b06da6ead9fd582eea5fdb6273b06fa3f060d0a26a7"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:065f90501cf008375d70be6ce72dd41745e09d088f0b545f5f914d2c3f04f7ae"}, - {file = "y_py-0.5.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:742c486d5b792c4ad76e09426161302edddca85efe826fa01dcee50907326cd7"}, - {file = "y_py-0.5.9-cp311-none-win32.whl", hash = "sha256:2692c808bf28f797f8d693f45dc86563ac3b1626579f67ce9546dca69644d687"}, - {file = "y_py-0.5.9-cp311-none-win_amd64.whl", hash = "sha256:c1f5f287cc7ae127ed6a2fb1546e631b316a41d087d7d2db9caa3e5f59906dcf"}, - {file = "y_py-0.5.9-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9a59603cf42c20d02ee5add2e3d0ce48e89c480a2a02f642fb77f142c4f37958"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b44473bb32217c78e18db66f497f6c8be33e339bab5f52398bb2468c904d5140"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1906f13e8d5ebfbd9c7948f57bc6f6f53b451b19c99350f42a0f648147a8acfe"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:202b2a3e42e0a1eaedee26f8a3bc73cd9f994c4c2b15511ea56b9838178eb380"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13b9d2959d9a26536b6ad118fb026ff19bd79da52e4addf6f3a562e7c01d516e"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff3ddedaa95284f4f22a92b362f658f3d92f272d8c0fa009051bd5490c4d5a04"}, - {file = "y_py-0.5.9-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:85585e669d7679126e4a04e4bc0a063a641175a74eecfe47539e8da3e5b1da6e"}, - {file = "y_py-0.5.9-cp37-none-win32.whl", hash = "sha256:caf9b1feb69379d424a1d3d7c899b8e0389a3fb3131d39c3c03dcc3d4a93dbdc"}, - {file = "y_py-0.5.9-cp37-none-win_amd64.whl", hash = "sha256:7353af0e9c1f42fbf0ab340e253eeb333d58c890fa91d3eadb1b9adaf9336732"}, - {file = "y_py-0.5.9-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:ed0fd5265905cc7e23709479bc152d69f4972dec32fa322d20cb77f749707e78"}, - {file = "y_py-0.5.9-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:db1ac7f2d1862eb4c448cf76183399d555a63dbe2452bafecb1c2f691e36d687"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa685f7e43ce490dfb1e392ac48f584b75cd21f05dc526c160d15308236ce8a0"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c42f3a6cd20153925b00c49af855a3277989d411bb8ea849095be943ee160821"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:753aaae817d658a1e9d271663439d8e83d9d8effa45590ecdcadc600c7cf77e3"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc8e5f38842a4b043c9592bfa9a740147ddb8fac2d7a5b7bf6d52466c090ec23"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd3cb0d13ac92e7b9235d1024dba9af0788161246f12dcf1f635d634ccb206a"}, - {file = "y_py-0.5.9-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9983e99e3a61452b39ffce98206c7e4c6d260f4e917c8fe53fb54aaf25df89a3"}, - {file = "y_py-0.5.9-cp38-none-win32.whl", hash = "sha256:63ef8e5b76cd54578a7fd5f72d8c698d9ccd7c555c7900ebfd38a24d397c3b15"}, - {file = "y_py-0.5.9-cp38-none-win_amd64.whl", hash = "sha256:fe70d0134fe2115c08866f0cac0eb5c0788093872b5026eb438a74e1ebafd659"}, - {file = "y_py-0.5.9-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:05f805b58422d5d7c8e7e8e2141d1c3cac4daaa4557ae6a9b84b141fe8d6289e"}, - {file = "y_py-0.5.9-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:a7977eeaceaeb0dfffcc5643c985c337ebc33a0b1d792ae0a9b1331cdd97366f"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:800e73d2110b97a74c52db2c8ce03a78e96f0d66a7e0c87d8254170a67c2db0e"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add793f5f5c7c7a3eb1b09ffc771bdaae10a0bd482a370bf696b83f8dee8d1b4"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8b67ae37af8aac6160fda66c0f73bcdf65c06da9022eb76192c3fc45cfab994"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2532ea5aefb223fd688c93860199d348a7601d814aac9e8784d816314588ddeb"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df78a0409dca11554a4b6442d7a8e61f762c3cfc78d55d98352392869a6b9ae0"}, - {file = "y_py-0.5.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2da2a9e28dceab4832945a745cad507579f52b4d0c9e2f54ae156eb56875861"}, - {file = "y_py-0.5.9-cp39-none-win32.whl", hash = "sha256:fdafb93bfd5532b13a53c4090675bcd31724160017ecc73e492dc1211bc0377a"}, - {file = "y_py-0.5.9-cp39-none-win_amd64.whl", hash = "sha256:73200c59bb253b880825466717941ac57267f2f685b053e183183cb6fe82874d"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:af6df5ec1d66ee2d962026635d60e84ad35fc01b2a1e36b993360c0ce60ae349"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:0c0e333c20b0a6ce4a5851203d45898ab93f16426c342420b931e190c5b71d3d"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7434c77cd23592973ed63341b8d337e6aebaba5ed40d7f22e2d43dfd0c3a56e"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e30fe2491d095c6d695a2c96257967fd3e2497f0f777030c8492d03c18d46e2a"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a57d81260e048caacf43a2f851766687f53e8a8356df6947fb0eee7336a7e2de"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d4dfc276f988175baaa4ab321c3321a16ce33db3356c9bc5f4dea0db3de55aa"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb68445414940efe547291340e91604c7b8379b60822678ef29f4fc2a0e11c62"}, - {file = "y_py-0.5.9-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd6f373dbf592ad83aaf95c16abebc8678928e49bd509ebd593259e1908345ae"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:76b3480e7037ac9390c450e2aff9e46e2c9e61520c0d88afe228110ec728adc5"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:9484a3fc33f812234e58a5ee834b42bb0a628054d61b5c06c323aa56c12e557d"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6d87d0c2e87990bc00c049742d36a5dbbb1510949459af17198728890ee748a"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fce5feb57f6231376eb10d1fb68c60da106ffa0b520b3129471c466eff0304cc"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:27c1e9a866146d250e9e16d99fe22a40c82f5b592ab85da97e5679fc3841c7ce"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d722d6a27230c1f395535da5cee6a9a16497c6343afd262c846090075c083009"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f54625b9ed4e787872c45d3044dcfd04c0da4258d9914f3d32308830b35246c"}, - {file = "y_py-0.5.9-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9513ae81fcc805671ae134c4c7421ca322acf92ce8b33817e1775ea8c0176973"}, - {file = "y_py-0.5.9.tar.gz", hash = "sha256:50cfa0532bcee27edb8c64743b49570e28bb76a00cd384ead1d84b6f052d9368"}, -] - [[package]] name = "yarl" -version = "1.8.2" +version = "1.9.2" description = "Yet another URL library" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, - {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, - {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, - {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, - {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, - {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, - {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, - {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, - {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, - {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, - {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, - {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, - {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, - {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, - {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, - {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, - {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, - {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, - {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, - {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, - {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, - {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, - {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, - {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, - {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, - {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, - {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, - {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, - {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, - {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, - {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, - {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, - {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, - {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, - {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, - {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, - {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, - {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, - {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, - {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, - {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, - {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, - {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, - {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, - {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, - {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, - {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, - {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, - {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, - {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, - {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, - {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, - {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, - {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, - {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, - {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, - {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, - {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, - {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, - {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, - {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, - {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, - {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, - {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, - {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, - {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, - {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, - {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, - {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, - {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, - {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, - {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, - {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, - {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee4afac41415d52d53a9833ebae7e32b344be72835bbb589018c9e938045a560"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bf345c3a4f5ba7f766430f97f9cc1320786f19584acc7086491f45524a551ac"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a96c19c52ff442a808c105901d0bdfd2e28575b3d5f82e2f5fd67e20dc5f4ea"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:891c0e3ec5ec881541f6c5113d8df0315ce5440e244a716b95f2525b7b9f3608"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3a53ba34a636a256d767c086ceb111358876e1fb6b50dfc4d3f4951d40133d5"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:566185e8ebc0898b11f8026447eacd02e46226716229cea8db37496c8cdd26e0"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2b0738fb871812722a0ac2154be1f049c6223b9f6f22eec352996b69775b36d4"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:32f1d071b3f362c80f1a7d322bfd7b2d11e33d2adf395cc1dd4df36c9c243095"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e9fdc7ac0d42bc3ea78818557fab03af6181e076a2944f43c38684b4b6bed8e3"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56ff08ab5df8429901ebdc5d15941b59f6253393cb5da07b4170beefcf1b2528"}, + {file = "yarl-1.9.2-cp310-cp310-win32.whl", hash = "sha256:8ea48e0a2f931064469bdabca50c2f578b565fc446f302a79ba6cc0ee7f384d3"}, + {file = "yarl-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:50f33040f3836e912ed16d212f6cc1efb3231a8a60526a407aeb66c1c1956dde"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:646d663eb2232d7909e6601f1a9107e66f9791f290a1b3dc7057818fe44fc2b6"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aff634b15beff8902d1f918012fc2a42e0dbae6f469fce134c8a0dc51ca423bb"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83503934c6273806aed765035716216cc9ab4e0364f7f066227e1aaea90b8d0"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b25322201585c69abc7b0e89e72790469f7dad90d26754717f3310bfe30331c2"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22a94666751778629f1ec4280b08eb11815783c63f52092a5953faf73be24191"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ec53a0ea2a80c5cd1ab397925f94bff59222aa3cf9c6da938ce05c9ec20428d"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832b7e711027c114d79dffb92576acd1bd2decc467dec60e1cac96912602d0e6"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:95d2ecefbcf4e744ea952d073c6922e72ee650ffc79028eb1e320e732898d7e8"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d4e2c6d555e77b37288eaf45b8f60f0737c9efa3452c6c44626a5455aeb250b9"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:783185c75c12a017cc345015ea359cc801c3b29a2966c2655cd12b233bf5a2be"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b8cc1863402472f16c600e3e93d542b7e7542a540f95c30afd472e8e549fc3f7"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:822b30a0f22e588b32d3120f6d41e4ed021806418b4c9f0bc3048b8c8cb3f92a"}, + {file = "yarl-1.9.2-cp311-cp311-win32.whl", hash = "sha256:a60347f234c2212a9f0361955007fcf4033a75bf600a33c88a0a8e91af77c0e8"}, + {file = "yarl-1.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:be6b3fdec5c62f2a67cb3f8c6dbf56bbf3f61c0f046f84645cd1ca73532ea051"}, + {file = "yarl-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38a3928ae37558bc1b559f67410df446d1fbfa87318b124bf5032c31e3447b74"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac9bb4c5ce3975aeac288cfcb5061ce60e0d14d92209e780c93954076c7c4367"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3da8a678ca8b96c8606bbb8bfacd99a12ad5dd288bc6f7979baddd62f71c63ef"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13414591ff516e04fcdee8dc051c13fd3db13b673c7a4cb1350e6b2ad9639ad3"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf74d08542c3a9ea97bb8f343d4fcbd4d8f91bba5ec9d5d7f792dbe727f88938"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e7221580dc1db478464cfeef9b03b95c5852cc22894e418562997df0d074ccc"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:494053246b119b041960ddcd20fd76224149cfea8ed8777b687358727911dd33"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:52a25809fcbecfc63ac9ba0c0fb586f90837f5425edfd1ec9f3372b119585e45"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e65610c5792870d45d7b68c677681376fcf9cc1c289f23e8e8b39c1485384185"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1b1bba902cba32cdec51fca038fd53f8beee88b77efc373968d1ed021024cc04"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:662e6016409828ee910f5d9602a2729a8a57d74b163c89a837de3fea050c7582"}, + {file = "yarl-1.9.2-cp37-cp37m-win32.whl", hash = "sha256:f364d3480bffd3aa566e886587eaca7c8c04d74f6e8933f3f2c996b7f09bee1b"}, + {file = "yarl-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6a5883464143ab3ae9ba68daae8e7c5c95b969462bbe42e2464d60e7e2698368"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5610f80cf43b6202e2c33ba3ec2ee0a2884f8f423c8f4f62906731d876ef4fac"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9a4e67ad7b646cd6f0938c7ebfd60e481b7410f574c560e455e938d2da8e0f4"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83fcc480d7549ccebe9415d96d9263e2d4226798c37ebd18c930fce43dfb9574"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fcd436ea16fee7d4207c045b1e340020e58a2597301cfbcfdbe5abd2356c2fb"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84e0b1599334b1e1478db01b756e55937d4614f8654311eb26012091be109d59"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3458a24e4ea3fd8930e934c129b676c27452e4ebda80fbe47b56d8c6c7a63a9e"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:838162460b3a08987546e881a2bfa573960bb559dfa739e7800ceeec92e64417"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e2d08f07a3d7d3e12549052eb5ad3eab1c349c53ac51c209a0e5991bbada78"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de119f56f3c5f0e2fb4dee508531a32b069a5f2c6e827b272d1e0ff5ac040333"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:149ddea5abf329752ea5051b61bd6c1d979e13fbf122d3a1f9f0c8be6cb6f63c"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:674ca19cbee4a82c9f54e0d1eee28116e63bc6fd1e96c43031d11cbab8b2afd5"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:9b3152f2f5677b997ae6c804b73da05a39daa6a9e85a512e0e6823d81cdad7cc"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415d5a4b080dc9612b1b63cba008db84e908b95848369aa1da3686ae27b6d2b"}, + {file = "yarl-1.9.2-cp38-cp38-win32.whl", hash = "sha256:f7a3d8146575e08c29ed1cd287068e6d02f1c7bdff8970db96683b9591b86ee7"}, + {file = "yarl-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:63c48f6cef34e6319a74c727376e95626f84ea091f92c0250a98e53e62c77c72"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75df5ef94c3fdc393c6b19d80e6ef1ecc9ae2f4263c09cacb178d871c02a5ba9"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c027a6e96ef77d401d8d5a5c8d6bc478e8042f1e448272e8d9752cb0aff8b5c8"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3b078dbe227f79be488ffcfc7a9edb3409d018e0952cf13f15fd6512847f3f7"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59723a029760079b7d991a401386390c4be5bfec1e7dd83e25a6a0881859e716"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b03917871bf859a81ccb180c9a2e6c1e04d2f6a51d953e6a5cdd70c93d4e5a2a"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1012fa63eb6c032f3ce5d2171c267992ae0c00b9e164efe4d73db818465fac3"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74dcbfe780e62f4b5a062714576f16c2f3493a0394e555ab141bf0d746bb955"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c56986609b057b4839968ba901944af91b8e92f1725d1a2d77cbac6972b9ed1"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c315df3293cd521033533d242d15eab26583360b58f7ee5d9565f15fee1bef4"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b7232f8dfbd225d57340e441d8caf8652a6acd06b389ea2d3222b8bc89cbfca6"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:53338749febd28935d55b41bf0bcc79d634881195a39f6b2f767870b72514caf"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8288d7cd28f8119b07dd49b7230d6b4562f9b61ee9a4ab02221060d21136be80"}, + {file = "yarl-1.9.2-cp39-cp39-win32.whl", hash = "sha256:b124e2a6d223b65ba8768d5706d103280914d61f5cae3afbc50fc3dfcc016623"}, + {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, + {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, ] [package.dependencies] idna = ">=2.0" multidict = ">=4.0" -[[package]] -name = "ypy-websocket" -version = "0.8.2" -description = "WebSocket connector for Ypy" -category = "dev" -optional = false -python-versions = ">=3.7" -files = [ - {file = "ypy_websocket-0.8.2-py3-none-any.whl", hash = "sha256:9049d5a7d61c26c2b5a39757c9ffcbe2274bf3553adeea8de7fe1c04671d4145"}, - {file = "ypy_websocket-0.8.2.tar.gz", hash = "sha256:491b2cc4271df4dde9be83017c15f4532b597dc43148472eb20c5aeb838a5b46"}, -] - -[package.dependencies] -aiofiles = ">=22.1.0,<23" -aiosqlite = ">=0.17.0,<1" -y-py = ">=0.5.3,<0.6.0" - -[package.extras] -test = ["mypy", "pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)"] - [[package]] name = "zarr" version = "2.14.2" @@ -4608,4 +4448,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "0e5fb627ce20a5232efbd34634b80274ab4b98954e61747d4c05c7adbb8621c2" +content-hash = "b891bc64ddb783e50987e7248a57361b7ca649212301b8de6faec3e0fd24aea3" diff --git a/pyproject.toml b/pyproject.toml index 94460da7d..aaa5d3d2d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ dash-cytoscape = "*" jupyter-dash = "*" numpydoc_decorator = ">=2.0.0" typing_extensions = "*" +typeguard = ">=4.0.0" [tool.poetry.dev-dependencies] pytest = "*" diff --git a/tests/anoph/conftest.py b/tests/anoph/conftest.py index 61ce398ca..126b35167 100644 --- a/tests/anoph/conftest.py +++ b/tests/anoph/conftest.py @@ -33,20 +33,19 @@ def fixture_dir(): return cwd / "fixture" -def simulate_contig(*, low, high): +def simulate_contig(*, low, high, base_composition): size = np.random.randint(low=low, high=high) - seq = np.random.choice( - [b"A", b"C", b"G", b"T", b"N", b"a", b"c", b"g", b"t", b"n"], - size=size, - ) + bases = np.array([b"a", b"c", b"g", b"t", b"n", b"A", b"C", b"G", b"T", b"N"]) + p = np.array([base_composition[b] for b in bases]) + seq = np.random.choice(bases, size=size, replace=True, p=p) return seq -def simulate_genome(*, path, contigs, low, high): +def simulate_genome(*, path, contigs, low, high, base_composition): path.mkdir(parents=True, exist_ok=True) root = zarr.open(path, mode="w") for contig in contigs: - seq = simulate_contig(low=low, high=high) + seq = simulate_contig(low=low, high=high, base_composition=base_composition) root.create_dataset(name=contig, data=seq) zarr.consolidate_metadata(path) return root @@ -315,18 +314,203 @@ def simulate_exons( yield feature +def simulate_snp_sites(path, contigs, genome): + root = zarr.open(path, mode="w") + n_sites = dict() + + for contig in contigs: + # Obtain variants group. + variants = root.require_group(contig).require_group("variants") + + # Simulate POS. + seq = genome[contig][:] + loc_n = (seq == b"N") | (seq == b"n") + pos = np.nonzero(~loc_n)[0] + 1 # 1-based coordinates + variants.create_dataset(name="POS", data=pos.astype("i4")) + + # Simulate REF. + ref = np.char.upper(seq[~loc_n]) # ensure upper case + assert pos.shape == ref.shape + variants.create_dataset(name="REF", data=ref) + + # Simulate ALT. + alt = np.empty(shape=(ref.shape[0], 3), dtype="S1") + alt[ref == b"A"] = np.array([b"C", b"T", b"G"]) + alt[ref == b"C"] = np.array([b"A", b"T", b"G"]) + alt[ref == b"T"] = np.array([b"A", b"C", b"G"]) + alt[ref == b"G"] = np.array([b"A", b"C", b"T"]) + variants.create_dataset(name="ALT", data=alt) + + # Store number of sites for later. + n_sites[contig] = pos.shape[0] + + zarr.consolidate_metadata(path) + return n_sites + + +def simulate_site_filters(path, contigs, p_pass, n_sites): + root = zarr.open(path, mode="w") + p = np.array([1 - p_pass, p_pass]) + for contig in contigs: + variants = root.require_group(contig).require_group("variants") + size = n_sites[contig] + filter_pass = np.random.choice([False, True], size=size, p=p) + variants.create_dataset(name="filter_pass", data=filter_pass) + zarr.consolidate_metadata(path) + + +def simulate_snp_genotypes( + zarr_path, metadata_path, contigs, n_sites, p_allele, p_missing +): + root = zarr.open(zarr_path, mode="w") + + # Create samples array. + df_samples = pd.read_csv(metadata_path) + n_samples = len(df_samples) + samples = df_samples["sample_id"].values.astype("S") + root.create_dataset(name="samples", data=samples) + + for contig in contigs: + # Set up groups. + contig_grp = root.require_group(contig) + calldata = contig_grp.require_group("calldata") + contig_n_sites = n_sites[contig] + + # Simulate genotype calls. + gt = np.random.choice( + np.arange(4, dtype="i1"), + size=(contig_n_sites, n_samples, 2), + replace=True, + p=p_allele, + ) + + # Simulate missing calls. + n_calls = contig_n_sites * n_samples + loc_missing = np.random.choice( + [False, True], size=n_calls, replace=True, p=p_missing + ) + gt.reshape(-1, 2)[loc_missing] = -1 + + # Store genotype calls. + # N.B., we need to chunk across the final dimension here, + # otherwise allele count computation breaks inside scikit-allel. + gt_chunks = (contig_n_sites // 5, n_samples // 3, None) + calldata.create_dataset(name="GT", data=gt, chunks=gt_chunks) + + # Create other arrays - these are never actually used currently + # so we'll create some empty arrays to avoid delaying the tests. + calldata.create_dataset( + name="GQ", shape=(contig_n_sites, n_samples), dtype="i1", fill_value=-1 + ) + calldata.create_dataset( + name="MQ", shape=(contig_n_sites, n_samples), dtype="f4", fill_value=-1 + ) + calldata.create_dataset( + name="AD", + shape=(contig_n_sites, n_samples, 4), + dtype="i2", + fill_value=-1, + ) + + zarr.consolidate_metadata(zarr_path) + + +def simulate_site_annotations(path, genome): + root = zarr.open(path, mode="w") + contigs = list(genome) + + # Take a very simple approach here to simulate random data. + # It won't be biologically realistic, but should hopefully + # suffice for testing purposes. + + # codon_degeneracy + grp = root.require_group("codon_degeneracy") + vals = np.arange(-1, 5) + p = [0.897754, 0.0, 0.060577, 0.014287, 0.011096, 0.016286] + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.choice(vals, size=size, replace=True, p=p) + grp.create_dataset(name=contig, data=x) + + # codon_nonsyn + grp = root.require_group("codon_nonsyn") + vals = np.arange(4) + p = [0.91404, 0.001646, 0.018698, 0.065616] + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.choice(vals, size=size, replace=True, p=p) + grp.create_dataset(name=contig, data=x) + + # codon_position + grp = root.require_group("codon_position") + vals = np.arange(4) + p = [0.897754, 0.034082, 0.034082, 0.034082] + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.choice(vals, size=size, replace=True, p=p) + grp.create_dataset(name=contig, data=x) + + # seq_cls + grp = root.require_group("seq_cls") + vals = np.arange(11) + p = [ + 0.034824, + 0.230856, + 0.318803, + 0.009675, + 0.015201, + 0.015446, + 0.059981, + 0.018995, + 0.085244, + 0.180545, + 0.03043, + ] + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.choice(vals, size=size, replace=True, p=p) + grp.create_dataset(name=contig, data=x) + + # seq_flen + grp = root.require_group("seq_flen") + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.randint(low=0, high=40_000, size=size) + grp.create_dataset(name=contig, data=x) + + # seq_relpos_start + grp = root.require_group("seq_relpos_start") + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.beta(a=0.4, b=4, size=size) * 40_000 + grp.create_dataset(name=contig, data=x) + + # seq_relpos_stop + grp = root.require_group("seq_relpos_stop") + for contig in contigs: + size = genome[contig].shape[0] + x = np.random.beta(a=0.4, b=4, size=size) * 40_000 + grp.create_dataset(name=contig, data=x) + + zarr.consolidate_metadata(path) + + class Ag3Simulator: def __init__(self, fixture_dir): self.fixture_dir = fixture_dir self.bucket = "vo_agam_release" - self.path = (self.fixture_dir / "simulated" / self.bucket).resolve() - self.url = self.path.as_uri() + self.bucket_path = (self.fixture_dir / "simulated" / self.bucket).resolve() + self.results_cache_path = ( + self.fixture_dir / "simulated" / "ag3_results_cache" + ).resolve() + self.url = self.bucket_path.as_uri() - # Clear out the fixture directory. - shutil.rmtree(self.path, ignore_errors=True) + # Clear out the fixture directories. + shutil.rmtree(self.bucket_path, ignore_errors=True) + shutil.rmtree(self.results_cache_path, ignore_errors=True) # Ensure the fixture directory exists. - self.path.mkdir(parents=True, exist_ok=True) + self.bucket_path.mkdir(parents=True, exist_ok=True) # Create fixture data. self.releases = ("3.0", "3.1") @@ -337,6 +521,10 @@ def __init__(self, fixture_dir): self.init_genome_sequence() self.init_genome_features() self.init_metadata() + self.init_snp_sites() + self.init_site_filters() + self.init_snp_genotypes() + self.init_site_annotations() def init_config(self): self.config = { @@ -355,7 +543,7 @@ def init_config(self): "SITE_MASK_IDS": ["gamb_colu_arab", "gamb_colu", "arab"], "PHASING_ANALYSIS_IDS": ["gamb_colu_arab", "gamb_colu", "arab"], } - config_path = self.path / "v3-config.json" + config_path = self.bucket_path / "v3-config.json" with config_path.open(mode="w") as f: json.dump(self.config, f, indent=4) @@ -367,13 +555,13 @@ def init_public_release_manifest(self): # Here we create a release manifest for an Ag3-style # public release. Note this is not the exact same data # as the real release. - release_path = self.path / "v3" + release_path = self.bucket_path / "v3" release_path.mkdir(parents=True, exist_ok=True) manifest_path = release_path / "manifest.tsv" manifest = pd.DataFrame( { "sample_set": ["AG1000G-AO", "AG1000G-BF-A"], - "sample_count": [randint(10, 81), randint(10, 100)], + "sample_count": [randint(10, 60), randint(10, 50)], } ) manifest.to_csv(manifest_path, index=False, sep="\t") @@ -383,7 +571,7 @@ def init_pre_release_manifest(self): # Here we create a release manifest for an Ag3-style # pre-release. Note this is not the exact same data # as the real release. - release_path = self.path / "v3.1" + release_path = self.bucket_path / "v3.1" release_path.mkdir(parents=True, exist_ok=True) manifest_path = release_path / "manifest.tsv" manifest = pd.DataFrame( @@ -391,7 +579,7 @@ def init_pre_release_manifest(self): "sample_set": [ "1177-VO-ML-LEHMANN-VMF00004", ], - "sample_count": [randint(10, 100)], + "sample_count": [randint(10, 70)], } ) manifest.to_csv(manifest_path, index=False, sep="\t") @@ -401,17 +589,34 @@ def init_genome_sequence(self): # Here we simulate a reference genome in a simple way # but with much smaller contigs. The data are stored # using zarr as with the real data releases. - # TODO Use accurate base composition. - path = self.path / self.config["GENOME_ZARR_PATH"] + + # Use real base composition. + base_composition = { + b"a": 0.042154199245128525, + b"c": 0.027760739796444212, + b"g": 0.027853725511269512, + b"t": 0.041827104954587246, + b"n": 0.028714045930701336, + b"A": 0.23177421009505061, + b"C": 0.1843981552034527, + b"G": 0.1840007377851694, + b"T": 0.23151655721224917, + b"N": 5.242659472466922e-07, + } + path = self.bucket_path / self.config["GENOME_ZARR_PATH"] self.genome = simulate_genome( - path=path, contigs=self.contigs, low=100_000, high=200_000 + path=path, + contigs=self.contigs, + low=100_000, + high=150_000, + base_composition=base_composition, ) self.contig_sizes = { contig: self.genome[contig].shape[0] for contig in self.contigs } def init_genome_features(self): - path = self.path / self.config["GENESET_GFF3_PATH"] + path = self.bucket_path / self.config["GENESET_GFF3_PATH"] path.parent.mkdir(parents=True, exist_ok=True) simulator = Gff3Simulator(contig_sizes=self.contig_sizes) self.genome_features = simulator.simulate_gff(path=path) @@ -440,7 +645,7 @@ def write_metadata(self, release, release_path, sample_set, aim=True, cohorts=Tr / "samples.meta.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "general" @@ -464,7 +669,7 @@ def write_metadata(self, release, release_path, sample_set, aim=True, cohorts=Tr / "samples.species_aim.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "species_calls_aim_20220528" @@ -488,7 +693,7 @@ def write_metadata(self, release, release_path, sample_set, aim=True, cohorts=Tr / "samples.cohorts.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "cohorts_20230223" @@ -511,7 +716,7 @@ def write_metadata(self, release, release_path, sample_set, aim=True, cohorts=Tr / "wgs_snp_data.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "general" @@ -536,19 +741,102 @@ def init_metadata(self): cohorts=False, ) + def init_snp_sites(self): + path = self.bucket_path / "v3/snp_genotypes/all/sites/" + self.n_sites = simulate_snp_sites( + path=path, contigs=self.contigs, genome=self.genome + ) + + def init_site_filters(self): + analysis = self.config["DEFAULT_SITE_FILTERS_ANALYSIS"] + + # Simulate the gamb_colu mask. + mask = "gamb_colu" + p_pass = 0.71 + path = self.bucket_path / "v3/site_filters" / analysis / mask + simulate_site_filters( + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + ) + + # Simulate the arab mask. + mask = "arab" + p_pass = 0.70 + path = self.bucket_path / "v3/site_filters" / analysis / mask + simulate_site_filters( + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + ) + + # Simulate the gamb_colu_arab mask. + mask = "gamb_colu_arab" + p_pass = 0.62 + path = self.bucket_path / "v3/site_filters" / analysis / mask + simulate_site_filters( + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + ) + + def init_snp_genotypes(self): + # Iterate over releases. + for release, manifest in self.release_manifests.items(): + # Determine release path. + if release == "3.0": + release_path = "v3" + else: + release_path = f"v{release}" + + # Iterate over sample sets in the release. + for rec in manifest.itertuples(): + sample_set = rec.sample_set + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + + # Create zarr hierarchy. + zarr_path = ( + self.bucket_path + / release_path + / "snp_genotypes" + / "all" + / sample_set + ) + + # Simulate SNP genotype data. + p_allele = np.array([0.979, 0.007, 0.008, 0.006]) + p_missing = np.array([0.96, 0.04]) + simulate_snp_genotypes( + zarr_path=zarr_path, + metadata_path=metadata_path, + contigs=self.contigs, + n_sites=self.n_sites, + p_allele=p_allele, + p_missing=p_missing, + ) + + def init_site_annotations(self): + path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] + simulate_site_annotations(path=path, genome=self.genome) + class Af1Simulator: def __init__(self, fixture_dir): self.fixture_dir = fixture_dir self.bucket = "vo_afun_release" - self.path = (self.fixture_dir / "simulated" / self.bucket).resolve() - self.url = self.path.as_uri() + self.bucket_path = (self.fixture_dir / "simulated" / self.bucket).resolve() + self.url = self.bucket_path.as_uri() + self.results_cache_path = ( + self.fixture_dir / "simulated" / "af1_results_cache" + ).resolve() - # Clear out the fixture directory. - shutil.rmtree(self.path, ignore_errors=True) + # Clear out the fixture directories. + shutil.rmtree(self.bucket_path, ignore_errors=True) + shutil.rmtree(self.results_cache_path, ignore_errors=True) # Ensure the fixture directory exists. - self.path.mkdir(parents=True, exist_ok=True) + self.bucket_path.mkdir(parents=True, exist_ok=True) # Create fixture data. self.releases = ("1.0",) @@ -558,6 +846,10 @@ def __init__(self, fixture_dir): self.init_genome_sequence() self.init_genome_features() self.init_metadata() + self.init_snp_sites() + self.init_site_filters() + self.init_snp_genotypes() + self.init_site_annotations() def init_config(self): self.config = { @@ -575,7 +867,7 @@ def init_config(self): "SITE_MASK_IDS": ["funestus"], "PHASING_ANALYSIS_IDS": ["funestus"], } - config_path = self.path / "v1.0-config.json" + config_path = self.bucket_path / "v1.0-config.json" with config_path.open(mode="w") as f: json.dump(self.config, f, indent=4) @@ -587,7 +879,7 @@ def init_public_release_manifest(self): # Here we create a release manifest for an Af1-style # public release. Note this is not the exact same data # as the real release. - release_path = self.path / "v1.0" + release_path = self.bucket_path / "v1.0" release_path.mkdir(parents=True, exist_ok=True) manifest_path = release_path / "manifest.tsv" manifest = pd.DataFrame( @@ -607,16 +899,34 @@ def init_genome_sequence(self): # Here we simulate a reference genome in a simple way # but with much smaller contigs. The data are stored # using zarr as with the real data releases. - path = self.path / self.config["GENOME_ZARR_PATH"] + + # Use real base composition. + base_composition = { + b"a": 0.0, + b"c": 0.0, + b"g": 0.0, + b"t": 0.0, + b"n": 0.0, + b"A": 0.29432128333333335, + b"C": 0.20542065, + b"G": 0.20575796666666665, + b"T": 0.2944834333333333, + b"N": 1.6666666666666667e-05, + } + path = self.bucket_path / self.config["GENOME_ZARR_PATH"] self.genome = simulate_genome( - path=path, contigs=self.contigs, low=100_000, high=300_000 + path=path, + contigs=self.contigs, + low=100_000, + high=200_000, + base_composition=base_composition, ) self.contig_sizes = { contig: self.genome[contig].shape[0] for contig in self.contigs } def init_genome_features(self): - path = self.path / self.config["GENESET_GFF3_PATH"] + path = self.bucket_path / self.config["GENESET_GFF3_PATH"] path.parent.mkdir(parents=True, exist_ok=True) simulator = Gff3Simulator( contig_sizes=self.contig_sizes, @@ -651,7 +961,7 @@ def write_metadata(self, release, release_path, sample_set): / "samples.meta.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "general" @@ -674,7 +984,7 @@ def write_metadata(self, release, release_path, sample_set): / "samples.cohorts.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "cohorts_20221129" @@ -697,7 +1007,7 @@ def write_metadata(self, release, release_path, sample_set): / "wgs_snp_data.csv" ) dst_path = ( - self.path + self.bucket_path / release_path / "metadata" / "general" @@ -724,6 +1034,66 @@ def init_metadata(self): sample_set="1231-VO-MULTI-WONDJI-VMF00043", ) + def init_snp_sites(self): + path = self.bucket_path / "v1.0/snp_genotypes/all/sites/" + self.n_sites = simulate_snp_sites( + path=path, contigs=self.contigs, genome=self.genome + ) + + def init_site_filters(self): + analysis = self.config["DEFAULT_SITE_FILTERS_ANALYSIS"] + + # Simulate the funestus mask. + mask = "funestus" + p_pass = 0.59 + path = self.bucket_path / "v1.0/site_filters" / analysis / mask + simulate_site_filters( + path=path, contigs=self.contigs, p_pass=p_pass, n_sites=self.n_sites + ) + + def init_snp_genotypes(self): + # Iterate over releases. + for release, manifest in self.release_manifests.items(): + # Determine release path. + release_path = f"v{release}" + + # Iterate over sample sets in the release. + for rec in manifest.itertuples(): + sample_set = rec.sample_set + metadata_path = ( + self.bucket_path + / release_path + / "metadata" + / "general" + / sample_set + / "samples.meta.csv" + ) + + # Create zarr hierarchy. + zarr_path = ( + self.bucket_path + / release_path + / "snp_genotypes" + / "all" + / sample_set + ) + + # Simulate SNP genotype data. + p_allele = np.array([0.981, 0.006, 0.008, 0.005]) + p_missing = np.array([0.95, 0.05]) + simulate_snp_genotypes( + zarr_path=zarr_path, + metadata_path=metadata_path, + contigs=self.contigs, + n_sites=self.n_sites, + p_allele=p_allele, + p_missing=p_missing, + ) + + def init_site_annotations(self): + path = self.bucket_path / self.config["SITE_ANNOTATIONS_ZARR_PATH"] + simulate_site_annotations(path=path, genome=self.genome) + # For the following data fixtures we will use the "session" scope # so that the fixture data will be created only once per test diff --git a/tests/anoph/test_base.py b/tests/anoph/test_base.py index 634c84f08..9e39729ee 100644 --- a/tests/anoph/test_base.py +++ b/tests/anoph/test_base.py @@ -102,6 +102,9 @@ def test_sample_sets_release(fixture, api): assert_frame_equal(df_ss[["sample_set", "sample_count"]], expected) assert (df_ss["release"] == release).all() + with pytest.raises(TypeError): + api.sample_sets(release=3.1) # type: ignore + @parametrize_with_cases("fixture,api", cases=".") def test_lookup_release(fixture, api): @@ -148,5 +151,3 @@ def test_prep_sample_sets_param(ag3_sim_api: AnophelesBase): ] with pytest.raises(ValueError): ag3_sim_api._prep_sample_sets_param(sample_sets=["AG1000G-AO", "foobar"]) - with pytest.raises(TypeError): - ag3_sim_api._prep_sample_sets_param(sample_sets=3.1) # type: ignore diff --git a/tests/anoph/test_genome_sequence.py b/tests/anoph/test_genome_sequence.py index fbf59c7b1..46b18b3c5 100644 --- a/tests/anoph/test_genome_sequence.py +++ b/tests/anoph/test_genome_sequence.py @@ -57,7 +57,7 @@ def test_open_genome(fixture, api): z = root[contig] assert isinstance(z, zarr.core.Array) assert z.ndim == 1 - assert z.dtype.kind == "S" + assert z.dtype == "S1" @parametrize_with_cases("fixture,api", cases=".") @@ -67,7 +67,7 @@ def test_genome_sequence(fixture, api): seq = api.genome_sequence(region=contig) assert isinstance(seq, da.Array) assert seq.ndim == 1 - assert seq.dtype.kind == "S" + assert seq.dtype == "S1" assert seq.shape[0] == root[contig].shape[0] diff --git a/tests/anoph/test_sample_metadata.py b/tests/anoph/test_sample_metadata.py index 0f384e9c9..0209ee2be 100644 --- a/tests/anoph/test_sample_metadata.py +++ b/tests/anoph/test_sample_metadata.py @@ -3,6 +3,7 @@ import pandas as pd import pytest from pytest_cases import parametrize_with_cases +from typeguard import suppress_type_checks from malariagen_data import af1 as _af1 from malariagen_data import ag3 as _ag3 @@ -434,8 +435,9 @@ def test_sample_metadata__ag3_query(ag3_sim_api): @parametrize_with_cases("fixture,api", cases=".") def test_extra_metadata_errors(fixture, api): # Bad type. - with pytest.raises(TypeError): - api.add_extra_metadata(data="foo") + with suppress_type_checks(): + with pytest.raises(TypeError): + api.add_extra_metadata(data="foo") bad_data = pd.DataFrame({"foo": [1, 2, 3], "bar": ["a", "b", "c"]}) diff --git a/tests/anoph/test_snp_data.py b/tests/anoph/test_snp_data.py new file mode 100644 index 000000000..79517a5d7 --- /dev/null +++ b/tests/anoph/test_snp_data.py @@ -0,0 +1,763 @@ +import random +from itertools import product + +import bokeh.model +import dask.array as da +import numpy as np +import pytest +import xarray as xr +import zarr +from numpy.testing import assert_array_equal +from pytest_cases import parametrize_with_cases + +from malariagen_data import af1 as _af1 +from malariagen_data import ag3 as _ag3 +from malariagen_data.anoph.snp_data import AnophelesSnpData + + +@pytest.fixture +def ag3_sim_api(ag3_sim_fixture): + return AnophelesSnpData( + url=ag3_sim_fixture.url, + config_path=_ag3.CONFIG_PATH, + gcs_url=_ag3.GCS_URL, + major_version_number=_ag3.MAJOR_VERSION_NUMBER, + major_version_path=_ag3.MAJOR_VERSION_PATH, + pre=True, + aim_metadata_dtype={ + "aim_species_fraction_arab": "float64", + "aim_species_fraction_colu": "float64", + "aim_species_fraction_colu_no2l": "float64", + "aim_species_gambcolu_arabiensis": object, + "aim_species_gambiae_coluzzii": object, + "aim_species": object, + }, + gff_gene_type="gene", + gff_default_attributes=("ID", "Parent", "Name", "description"), + default_site_mask="gamb_colu_arab", + results_cache=ag3_sim_fixture.results_cache_path.as_posix(), + ) + + +@pytest.fixture +def af1_sim_api(af1_sim_fixture): + return AnophelesSnpData( + url=af1_sim_fixture.url, + config_path=_af1.CONFIG_PATH, + gcs_url=_af1.GCS_URL, + major_version_number=_af1.MAJOR_VERSION_NUMBER, + major_version_path=_af1.MAJOR_VERSION_PATH, + pre=False, + gff_gene_type="protein_coding_gene", + gff_default_attributes=("ID", "Parent", "Note", "description"), + default_site_mask="funestus", + results_cache=af1_sim_fixture.results_cache_path.as_posix(), + ) + + +# N.B., here we use pytest_cases to parametrize tests. Each +# function whose name begins with "case_" defines a set of +# inputs to the test functions. See the documentation for +# pytest_cases for more information, e.g.: +# +# https://smarie.github.io/python-pytest-cases/#basic-usage +# +# We use this approach here because we want to use fixtures +# as test parameters, which is otherwise hard to do with +# pytest alone. + + +def case_ag3_sim(ag3_sim_fixture, ag3_sim_api): + return ag3_sim_fixture, ag3_sim_api + + +def case_af1_sim(af1_sim_fixture, af1_sim_api): + return af1_sim_fixture, af1_sim_api + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_snp_sites(fixture, api: AnophelesSnpData): + root = api.open_snp_sites() + assert isinstance(root, zarr.hierarchy.Group) + for contig in api.contigs: + assert contig in root + contig_grp = root[contig] + assert "variants" in contig_grp + variants = contig_grp["variants"] + assert "POS" in variants + assert "REF" in variants + assert "ALT" in variants + + +def test_site_mask_ids_ag3(ag3_sim_api: AnophelesSnpData): + assert ag3_sim_api.site_mask_ids == ("gamb_colu_arab", "gamb_colu", "arab") + + +def test_site_mask_ids_af1(af1_sim_api: AnophelesSnpData): + assert af1_sim_api.site_mask_ids == ("funestus",) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_site_filters(fixture, api: AnophelesSnpData): + for mask in api.site_mask_ids: + root = api.open_site_filters(mask=mask) + assert isinstance(root, zarr.hierarchy.Group) + for contig in api.contigs: + assert contig in root + contig_grp = root[contig] + assert "variants" in contig_grp + variants_grp = contig_grp["variants"] + assert "filter_pass" in variants_grp + filter_pass = variants_grp["filter_pass"] + assert filter_pass.dtype == bool + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_snp_genotypes(fixture, api: AnophelesSnpData): + for rec in api.sample_sets().itertuples(): + sample_set = rec.sample_set + n_samples = rec.sample_count + root = api.open_snp_genotypes(sample_set=sample_set) + assert isinstance(root, zarr.hierarchy.Group) + + # Check samples array. + assert "samples" in root + samples = root["samples"][:] + assert samples.ndim == 1 + assert samples.shape[0] == n_samples + assert samples.dtype.kind == "S" + + for contig in api.contigs: + assert contig in root + contig_grp = root[contig] + + # Check calldata arrays. + n_sites = fixture.n_sites[contig] + assert "calldata" in contig_grp + calldata = contig_grp["calldata"] + assert "GT" in calldata + gt = calldata["GT"] + assert gt.shape == (n_sites, n_samples, 2) + assert gt.dtype == "i1" + assert "GQ" in calldata + gq = calldata["GQ"] + assert gq.shape == (n_sites, n_samples) + assert gq.dtype == "i1" + assert "MQ" in calldata + mq = calldata["MQ"] + assert mq.shape == (n_sites, n_samples) + assert mq.dtype == "f4" + assert "AD" in calldata + ad = calldata["AD"] + assert ad.shape == (n_sites, n_samples, 4) + assert ad.dtype == "i2" + + +def _check_site_filters(api: AnophelesSnpData, mask, region): + filter_pass = api.site_filters(region=region, mask=mask) + assert isinstance(filter_pass, da.Array) + assert filter_pass.ndim == 1 + assert filter_pass.dtype == bool + + +@parametrize_with_cases("fixture,api", cases=".") +def test_site_filters(fixture, api: AnophelesSnpData): + for mask in api.site_mask_ids: + # Test with contig. + contig = random.choice(api.contigs) + _check_site_filters(api, mask=mask, region=contig) + + # Test with region string. + region = f"{contig}:20,000-50,000" + _check_site_filters(api, mask=mask, region=region) + + # Test with genome feature ID. + df_gff = api.genome_features(attributes=["ID"]) + region = random.choice(df_gff["ID"].dropna().to_list()) + _check_site_filters(api, mask=mask, region=region) + + +def _check_snp_sites(api: AnophelesSnpData, region): + pos = api.snp_sites(region=region, field="POS") + ref = api.snp_sites(region=region, field="REF") + alt = api.snp_sites(region=region, field="ALT") + assert isinstance(pos, da.Array) + assert pos.ndim == 1 + assert pos.dtype == "i4" + assert isinstance(ref, da.Array) + assert ref.ndim == 1 + assert ref.dtype == "S1" + assert isinstance(alt, da.Array) + assert alt.ndim == 2 + assert alt.dtype == "S1" + assert pos.shape[0] == ref.shape[0] == alt.shape[0] + + # Apply site mask. + mask = random.choice(api.site_mask_ids) + filter_pass = api.site_filters(region=region, mask=mask).compute() + n_pass = np.count_nonzero(filter_pass) + pos_pass = api.snp_sites( + region=region, + field="POS", + site_mask=mask, + ) + assert isinstance(pos_pass, da.Array) + assert pos_pass.ndim == 1 + assert pos_pass.dtype == "i4" + assert pos_pass.shape[0] == n_pass + assert pos_pass.compute().shape == pos_pass.shape + for f in "POS", "REF", "ALT": + d = api.snp_sites( + region=region, + site_mask=mask, + field=f, + ) + assert isinstance(d, da.Array) + assert d.shape[0] == n_pass + assert d.shape == d.compute().shape + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_sites(fixture, api: AnophelesSnpData): + # Test with contig. + contig = random.choice(api.contigs) + _check_snp_sites(api=api, region=contig) + + # Test with region string. + region = f"{contig}:20,000-50,000" + _check_snp_sites(api=api, region=region) + + # Test with genome feature ID. + df_gff = api.genome_features(attributes=["ID"]) + region = random.choice(df_gff["ID"].dropna().to_list()) + _check_snp_sites(api=api, region=region) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_open_site_annotations(fixture, api): + root = api.open_site_annotations() + assert isinstance(root, zarr.hierarchy.Group) + for f in ( + "codon_degeneracy", + "codon_nonsyn", + "codon_position", + "seq_cls", + "seq_flen", + "seq_relpos_start", + "seq_relpos_stop", + ): + assert f in root + for contig in api.contigs: + assert contig in root[f] + z = root[f][contig] + # Zarr data should be aligned with genome sequence. + assert z.shape == (len(api.genome_sequence(region=contig)),) + + +def _check_site_annotations(api: AnophelesSnpData, region, site_mask): + ds_snp = api.snp_variants(region=region, site_mask=site_mask) + n_variants = ds_snp.dims["variants"] + ds_ann = api.site_annotations(region=region, site_mask=site_mask) + # Site annotations dataset should be aligned with SNP sites. + assert ds_ann.dims["variants"] == n_variants + assert isinstance(ds_ann, xr.Dataset) + for f in ( + "codon_degeneracy", + "codon_nonsyn", + "codon_position", + "seq_cls", + "seq_flen", + "seq_relpos_start", + "seq_relpos_stop", + ): + d = ds_ann[f] + assert d.ndim == 1 + assert d.dims == ("variants",) + assert d.shape == (n_variants,) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_site_annotations(fixture, api): + # Parametrize region. + contig = random.choice(api.contigs) + df_gff = api.genome_features(attributes=["ID"]) + # Don't need to support multiple regions at this time. + parametrize_region = [ + contig, + f"{contig}:20,000-50,000", + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Parametrize site_mask. + parametrize_site_mask = (None, random.choice(api.site_mask_ids)) + + # Run tests. + for region, site_mask in product( + parametrize_region, + parametrize_site_mask, + ): + _check_site_annotations( + api=api, + region=region, + site_mask=site_mask, + ) + + +def _check_snp_genotypes(api, sample_sets, region): + df_samples = api.sample_metadata(sample_sets=sample_sets) + + # Check default field (GT). + gt = api.snp_genotypes(region=region, sample_sets=sample_sets) + assert isinstance(gt, da.Array) + assert gt.ndim == 3 + assert gt.dtype == "i1" + assert gt.shape[1] == len(df_samples) + + # Check GT. + x = api.snp_genotypes( + region=region, + sample_sets=sample_sets, + field="GT", + ) + assert isinstance(x, da.Array) + assert x.ndim == 3 + assert x.dtype == "i1" + + # Check GQ. + x = api.snp_genotypes( + region=region, + sample_sets=sample_sets, + field="GQ", + ) + assert isinstance(x, da.Array) + assert x.ndim == 2 + assert x.dtype == "i1" + + # Check MQ. + x = api.snp_genotypes( + region=region, + sample_sets=sample_sets, + field="MQ", + ) + assert isinstance(x, da.Array) + assert x.ndim == 2 + assert x.dtype == "f4" + + # Check AD. + x = api.snp_genotypes( + region=region, + sample_sets=sample_sets, + field="AD", + ) + assert isinstance(x, da.Array) + assert x.ndim == 3 + assert x.dtype == "i2" + + # Check with site mask. + mask = random.choice(api.site_mask_ids) + filter_pass = api.site_filters(region=region, mask=mask).compute() + gt_pass = api.snp_genotypes( + region=region, + sample_sets=sample_sets, + site_mask=mask, + ) + assert isinstance(gt_pass, da.Array) + assert gt_pass.ndim == 3 + assert gt_pass.dtype == "i1" + assert gt_pass.shape[0] == np.count_nonzero(filter_pass) + assert gt_pass.shape[1] == len(df_samples) + assert gt_pass.shape[2] == 2 + + # Check native versus auto chunks. + gt_native = api.snp_genotypes( + region=region, sample_sets=sample_sets, chunks="native" + ) + gt_auto = api.snp_genotypes(region=region, sample_sets=sample_sets, chunks="auto") + assert gt_native.chunks != gt_auto.chunks + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_genotypes(fixture, api: AnophelesSnpData): + # Here we manually parametrize sample_sets and region, because + # parameters need to be determined at runtime. + + # Parametrize sample_sets. + all_releases = api.releases + all_sample_sets = api.sample_sets()["sample_set"].to_list() + parametrize_sample_sets = [ + None, + random.choice(all_sample_sets), + np.random.choice(all_sample_sets, size=2, replace=False).tolist(), + random.choice(all_releases), + ] + + # Parametrize region. + contig = random.choice(api.contigs) + df_gff = api.genome_features(attributes=["ID"]) + parametrize_region = [ + contig, + f"{contig}:20,000-50,000", + [f"{contig}:20,000-40,000", f"{contig}:60,000-80,000"], + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Run tests. + for sample_sets, region in product(parametrize_sample_sets, parametrize_region): + _check_snp_genotypes(api=api, sample_sets=sample_sets, region=region) + + +def _check_snp_calls(api, sample_sets, region, site_mask): + ds = api.snp_calls(region=region, sample_sets=sample_sets, site_mask=site_mask) + assert isinstance(ds, xr.Dataset) + + # Check fields. + expected_data_vars = { + "variant_allele", + "call_genotype", + "call_genotype_mask", + "call_GQ", + "call_AD", + "call_MQ", + } + for m in api.site_mask_ids: + expected_data_vars.add(f"variant_filter_pass_{m}") + assert set(ds.data_vars) == expected_data_vars + + expected_coords = { + "variant_contig", + "variant_position", + "sample_id", + } + assert set(ds.coords) == expected_coords + + # Check dimensions. + assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} + + # Check dim lengths. + pos = api.snp_sites(region=region, field="POS", site_mask=site_mask) + n_variants = len(pos) + df_samples = api.sample_metadata(sample_sets=sample_sets) + n_samples = len(df_samples) + assert ds.dims["variants"] == n_variants + assert ds.dims["samples"] == n_samples + assert ds.dims["ploidy"] == 2 + assert ds.dims["alleles"] == 4 + + # Check shapes. + for f in expected_coords | expected_data_vars: + x = ds[f] + assert isinstance(x, xr.DataArray) + assert isinstance(x.data, da.Array) + + if f == "variant_allele": + assert x.ndim == 2 + assert x.shape == (n_variants, 4) + assert x.dims == ("variants", "alleles") + elif f.startswith("variant_"): + assert x.ndim == 1 + assert x.shape == (n_variants,) + assert x.dims == ("variants",) + elif f in {"call_genotype", "call_genotype_mask"}: + assert x.ndim == 3 + assert x.dims == ("variants", "samples", "ploidy") + assert x.shape == (n_variants, n_samples, 2) + elif f == "call_AD": + assert x.ndim == 3 + assert x.dims == ("variants", "samples", "alleles") + assert x.shape == (n_variants, n_samples, 4) + elif f.startswith("call_"): + assert x.ndim == 2 + assert x.dims == ("variants", "samples") + assert x.shape == (n_variants, n_samples) + elif f.startswith("sample_"): + assert x.ndim == 1 + assert x.dims == ("samples",) + assert x.shape == (n_samples,) + + # Check samples. + expected_samples = df_samples["sample_id"].tolist() + assert ds["sample_id"].values.tolist() == expected_samples + + # Check attributes. + assert "contigs" in ds.attrs + assert ds.attrs["contigs"] == api.contigs + + # Check can set up computations. + d1 = ds["variant_position"] > 10_000 + assert isinstance(d1, xr.DataArray) + d2 = ds["call_AD"].sum(axis=(1, 2)) + assert isinstance(d2, xr.DataArray) + + # Check compress bug. + pos = ds["variant_position"].data + assert pos.shape == pos.compute().shape + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls(fixture, api: AnophelesSnpData): + # Here we manually parametrize sample_sets, region and site_mask, + # because parameters need to be determined at runtime. + + # Parametrize sample_sets. + all_releases = api.releases + all_sample_sets = api.sample_sets()["sample_set"].to_list() + parametrize_sample_sets = [ + None, + random.choice(all_sample_sets), + np.random.choice(all_sample_sets, size=2, replace=False).tolist(), + random.choice(all_releases), + ] + + # Parametrize region. + contig = random.choice(api.contigs) + df_gff = api.genome_features(attributes=["ID"]) + parametrize_region = [ + contig, + f"{contig}:20,000-50,000", + [f"{contig}:20,000-40,000", f"{contig}:60,000-80,000"], + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Parametrize site_mask. + parametrize_site_mask = (None,) + api.site_mask_ids + + # Run tests. + for sample_sets, region, site_mask in product( + parametrize_sample_sets, parametrize_region, parametrize_site_mask + ): + _check_snp_calls( + api=api, sample_sets=sample_sets, region=region, site_mask=site_mask + ) + + +@pytest.mark.parametrize( + "sample_query", + ["sex_call == 'F'", "taxon == 'coluzzii'", "taxon == 'robot'"], +) +def test_snp_calls_with_sample_query(ag3_sim_api: AnophelesSnpData, sample_query): + df_samples = ag3_sim_api.sample_metadata().query(sample_query) + + if len(df_samples) == 0: + with pytest.raises(ValueError): + ag3_sim_api.snp_calls(region="3L", sample_query=sample_query) + + else: + ds = ag3_sim_api.snp_calls(region="3L", sample_query=sample_query) + assert ds.dims["samples"] == len(df_samples) + assert_array_equal(ds["sample_id"].values, df_samples["sample_id"].values) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls_with_min_cohort_size(fixture, api: AnophelesSnpData): + # Randomly fix some input parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + contig = random.choice(api.contigs) + region = f"{contig}:20,000-50,000" + + # Test with minimum cohort size. + ds = api.snp_calls( + sample_sets=sample_sets, + region=region, + min_cohort_size=10, + ) + assert isinstance(ds, xr.Dataset) + assert ds.dims["samples"] >= 10 + with pytest.raises(ValueError): + api.snp_calls( + sample_sets=sample_sets, + region=region, + min_cohort_size=1_000, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls_with_max_cohort_size(fixture, api: AnophelesSnpData): + # Randomly fix some input parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + contig = random.choice(api.contigs) + region = f"{contig}:20,000-50,000" + + # Test with maximum cohort size. + ds = api.snp_calls( + sample_sets=sample_sets, + region=region, + max_cohort_size=15, + ) + assert isinstance(ds, xr.Dataset) + assert ds.dims["samples"] <= 15 + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_calls_with_cohort_size(fixture, api: AnophelesSnpData): + # Randomly fix some input parameters. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + n_samples = len(api.sample_metadata(sample_sets=sample_sets)) + contig = random.choice(api.contigs) + region = f"{contig}:20,000-50,000" + + # Test with specific cohort size. + cohort_size = 20 + if n_samples < 20: + with pytest.raises(ValueError): + api.snp_calls( + sample_sets=sample_sets, + region=region, + cohort_size=cohort_size, + ) + else: + ds = api.snp_calls( + sample_sets=sample_sets, + region=region, + cohort_size=cohort_size, + ) + assert isinstance(ds, xr.Dataset) + assert ds.dims["samples"] == 20 + + +@pytest.mark.parametrize( + "site_class", + [ + "CDS_DEG_4", + "CDS_DEG_2_SIMPLE", + "CDS_DEG_0", + "INTRON_SHORT", + "INTRON_LONG", + "INTRON_SPLICE_5PRIME", + "INTRON_SPLICE_3PRIME", + "UTR_5PRIME", + "UTR_3PRIME", + "INTERGENIC", + ], +) +def test_snp_calls_with_site_class(ag3_sim_api: AnophelesSnpData, site_class): + ds1 = ag3_sim_api.snp_calls(region="3L") + ds2 = ag3_sim_api.snp_calls(region="3L", site_class=site_class) + assert ds2.dims["variants"] < ds1.dims["variants"] + + +def _check_snp_allele_counts(api, region, sample_sets, sample_query, site_mask): + df_samples = api.sample_metadata(sample_sets=sample_sets, sample_query=sample_query) + n_samples = len(df_samples) + + # Run once to compute results. + ac = api.snp_allele_counts( + region=region, + sample_sets=sample_sets, + sample_query=sample_query, + site_mask=site_mask, + ) + assert isinstance(ac, np.ndarray) + pos = api.snp_sites(region=region, field="POS", site_mask=site_mask) + assert ac.shape == (pos.shape[0], 4) + assert np.all(ac >= 0) + an = ac.sum(axis=1) + assert an.max() <= 2 * n_samples + + # Run again to ensure loading from results cache produces the same result. + ac2 = api.snp_allele_counts( + region=region, + sample_sets=sample_sets, + sample_query=sample_query, + site_mask=site_mask, + ) + assert_array_equal(ac, ac2) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_snp_allele_counts(fixture, api): + # Parametrize sample_sets. + all_releases = api.releases + all_sample_sets = api.sample_sets()["sample_set"].to_list() + parametrize_sample_sets = [ + None, + random.choice(all_sample_sets), + np.random.choice(all_sample_sets, size=2, replace=False).tolist(), + random.choice(all_releases), + ] + + # Parametrize region. + contig = random.choice(api.contigs) + df_gff = api.genome_features(attributes=["ID"]) + parametrize_region = [ + contig, + f"{contig}:20,000-50,000", + [f"{contig}:20,000-40,000", f"{contig}:60,000-80,000"], + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Parametrize site_mask. + parametrize_site_mask = (None, random.choice(api.site_mask_ids)) + + # Parametrize sample_query. + parametrize_sample_query = [None, "sex_call == 'F'"] + + # Run tests. + for sample_sets, region, site_mask, sample_query in product( + parametrize_sample_sets, + parametrize_region, + parametrize_site_mask, + parametrize_sample_query, + ): + _check_snp_allele_counts( + api=api, + sample_sets=sample_sets, + region=region, + site_mask=site_mask, + sample_query=sample_query, + ) + + +def _check_is_accessible(api: AnophelesSnpData, region, mask): + is_accessible = api.is_accessible(region=region, site_mask=mask) + assert isinstance(is_accessible, np.ndarray) + assert is_accessible.ndim == 1 + assert is_accessible.shape[0] == api.genome_sequence(region=region).shape[0] + + +@parametrize_with_cases("fixture,api", cases=".") +def test_is_accessible(fixture, api: AnophelesSnpData): + # Parametrize region. + contig = random.choice(api.contigs) + df_gff = api.genome_features(attributes=["ID"]) + # Don't need to support multiple regions at this time. + parametrize_region = [ + contig, + f"{contig}:20,000-50,000", + random.choice(df_gff["ID"].dropna().to_list()), + ] + + # Parametrize site_mask. + parametrize_site_mask = api.site_mask_ids + + # Run tests. + for region, site_mask in product( + parametrize_region, + parametrize_site_mask, + ): + _check_is_accessible( + api=api, + region=region, + mask=site_mask, + ) + + +@parametrize_with_cases("fixture,api", cases=".") +def test_plot_snps(fixture, api: AnophelesSnpData): + # Randomly choose parameter values. + all_sample_sets = api.sample_sets()["sample_set"].to_list() + sample_sets = random.choice(all_sample_sets) + contig = random.choice(api.contigs) + region = f"{contig}:20,000-50,000" + site_mask = random.choice(api.site_mask_ids) + + # Exercise the function. + fig = api.plot_snps( + region=region, + sample_sets=sample_sets, + site_mask=site_mask, + show=False, + ) + assert isinstance(fig, bokeh.model.Model) diff --git a/tests/test_af1.py b/tests/test_af1.py index 8b68ee1ca..44d7c5968 100644 --- a/tests/test_af1.py +++ b/tests/test_af1.py @@ -33,315 +33,6 @@ def test_repr(): assert isinstance(r, str) -@pytest.mark.parametrize( - "region", - ["X", ["2RL:48,714,463-48,715,355", "LOC125762289"]], -) -def test_site_filters(region): - af1 = setup_af1() - filter_pass = af1.site_filters(region=region, mask="funestus") - assert isinstance(filter_pass, da.Array) - assert filter_pass.ndim == 1 - assert filter_pass.dtype == bool - - -@pytest.mark.parametrize( - "region", - ["X", ["2RL:48,714,463-48,715,355", "LOC125762289"]], -) -def test_snp_sites(region): - af1 = setup_af1() - - pos = af1.snp_sites(region=region, field="POS") - ref = af1.snp_sites(region=region, field="REF") - alt = af1.snp_sites(region=region, field="ALT") - assert isinstance(pos, da.Array) - assert pos.ndim == 1 - assert pos.dtype == "i4" - assert isinstance(ref, da.Array) - assert ref.ndim == 1 - assert ref.dtype == "S1" - assert isinstance(alt, da.Array) - assert alt.ndim == 2 - assert alt.dtype == "S1" - assert pos.shape[0] == ref.shape[0] == alt.shape[0] - - # apply site mask - filter_pass = af1.site_filters(region=region, mask="funestus").compute() - n_pass = np.count_nonzero(filter_pass) - pos_pass = af1.snp_sites(region=region, field="POS", site_mask="funestus") - assert isinstance(pos_pass, da.Array) - assert pos_pass.ndim == 1 - assert pos_pass.dtype == "i4" - assert pos_pass.shape[0] == n_pass - assert pos_pass.compute().shape == pos_pass.shape - for f in "POS", "REF", "ALT": - d = af1.snp_sites(region=region, site_mask="funestus", field=f) - assert isinstance(d, da.Array) - assert d.shape[0] == n_pass - assert d.shape == d.compute().shape - - -@pytest.mark.parametrize( - "sample_sets", - [ - None, - "1229-VO-GH-DADZIE-VMF00095", - ["1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"], - "1.0", - ], -) -@pytest.mark.parametrize( - "region", - ["X", ["2RL:48,714,463-48,715,355", "LOC125762289"]], -) -def test_snp_genotypes(sample_sets, region): - af1 = setup_af1() - - df_samples = af1.sample_metadata(sample_sets=sample_sets) - gt = af1.snp_genotypes(region=region, sample_sets=sample_sets) - assert isinstance(gt, da.Array) - assert gt.ndim == 3 - assert gt.dtype == "i1" - assert gt.shape[1] == len(df_samples) - - # specific fields - x = af1.snp_genotypes(region=region, sample_sets=sample_sets, field="GT") - assert isinstance(x, da.Array) - assert x.ndim == 3 - assert x.dtype == "i1" - - x = af1.snp_genotypes(region=region, sample_sets=sample_sets, field="GQ") - assert isinstance(x, da.Array) - assert x.ndim == 2 - assert x.dtype == "int8" - - x = af1.snp_genotypes(region=region, sample_sets=sample_sets, field="MQ") - assert isinstance(x, da.Array) - assert x.ndim == 2 - assert x.dtype == "float32" - - x = af1.snp_genotypes(region=region, sample_sets=sample_sets, field="AD") - assert isinstance(x, da.Array) - assert x.ndim == 3 - assert x.dtype == "i2" - - # site mask - filter_pass = af1.site_filters(region=region, mask="funestus").compute() - gt_pass = af1.snp_genotypes( - region=region, - sample_sets=sample_sets, - site_mask="funestus", - ) - assert isinstance(gt_pass, da.Array) - assert gt_pass.ndim == 3 - assert gt_pass.dtype == "i1" - assert gt_pass.shape[0] == np.count_nonzero(filter_pass) - assert gt_pass.shape[1] == len(df_samples) - assert gt_pass.shape[2] == 2 - - -@pytest.mark.parametrize( - "sample_sets", - [ - None, - "1229-VO-GH-DADZIE-VMF00095", - ["1230-VO-GA-CF-AYALA-VMF00045", "1231-VO-MULTI-WONDJI-VMF00043"], - "1.0", - ], -) -@pytest.mark.parametrize( - "region", - ["X"], -) -def test_snp_genotypes_chunks(sample_sets, region): - af1 = setup_af1() - gt_native = af1.snp_genotypes( - region=region, sample_sets=sample_sets, chunks="native" - ) - gt_auto = af1.snp_genotypes(region=region, sample_sets=sample_sets, chunks="auto") - gt_manual = af1.snp_genotypes( - region=region, sample_sets=sample_sets, chunks=(100_000, 10, 2) - ) - - assert gt_native.chunks != gt_auto.chunks - assert gt_auto.chunks != gt_manual.chunks - assert gt_manual.chunks != gt_native.chunks - assert gt_manual.chunks[0][0] == 100_000 - assert gt_manual.chunks[1][0] == 10 - assert gt_manual.chunks[2][0] == 2 - - -@pytest.mark.parametrize( - "region", - ["X", "LOC125762289", "2RL:48714463-48715355"], -) -def test_is_accessible(region): - af1 = setup_af1() - # run a couple of tests - is_accessible = af1.is_accessible(region=region, site_mask="funestus") - assert isinstance(is_accessible, np.ndarray) - assert is_accessible.ndim == 1 - assert is_accessible.shape[0] == af1.genome_sequence(region).shape[0] - - -@pytest.mark.parametrize("region", ["X", "LOC125762289", "2RL:48714463-48715355"]) -@pytest.mark.parametrize("site_mask", [None, "funestus"]) -def test_site_annotations(region, site_mask): - af1 = setup_af1() - - ds_snp = af1.snp_variants(region=region, site_mask=site_mask) - n_variants = ds_snp.dims["variants"] - ds_ann = af1.site_annotations(region=region, site_mask=site_mask) - # site annotations dataset is aligned with SNP sites - assert ds_ann.dims["variants"] == n_variants - assert isinstance(ds_ann, xr.Dataset) - for f in ( - "codon_degeneracy", - "codon_nonsyn", - "codon_position", - "seq_cls", - "seq_flen", - "seq_relpos_start", - "seq_relpos_stop", - ): - d = ds_ann[f] - assert d.ndim == 1 - assert d.dims == ("variants",) - assert d.shape == (n_variants,) - - -@pytest.mark.parametrize( - "sample_sets", - [ - None, - "1229-VO-GH-DADZIE-VMF00095", - ["1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"], - "1.0", - ], -) -@pytest.mark.parametrize( - "region", - ["X", ["2RL:48,714,463-48,715,355", "LOC125762289"]], -) -@pytest.mark.parametrize( - "site_mask", - [None, "funestus"], -) -def test_snp_calls(sample_sets, region, site_mask): - af1 = setup_af1() - - ds = af1.snp_calls(region=region, sample_sets=sample_sets, site_mask=site_mask) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "variant_filter_pass_funestus", - "call_genotype", - "call_genotype_mask", - "call_GQ", - "call_AD", - "call_MQ", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check dim lengths - pos = af1.snp_sites(region=region, field="POS", site_mask=site_mask) - n_variants = len(pos) - df_samples = af1.sample_metadata(sample_sets=sample_sets) - n_samples = len(df_samples) - assert ds.dims["variants"] == n_variants - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 4 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape == (n_variants, 4) - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.shape == (n_variants,) - assert x.dims == ("variants",) - elif f in {"call_genotype", "call_genotype_mask"}: - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape == (n_variants, n_samples, 2) - elif f == "call_AD": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "alleles") - assert x.shape == (n_variants, n_samples, 4) - elif f.startswith("call_"): - assert x.ndim == 2 - assert x.dims == ("variants", "samples") - assert x.shape == (n_variants, n_samples) - elif f.startswith("sample_"): - assert x.ndim == 1 - assert x.dims == ("samples",) - assert x.shape == (n_samples,) - - # check samples - expected_samples = df_samples["sample_id"].tolist() - assert ds["sample_id"].values.tolist() == expected_samples - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2RL", "3RL", "X") - - # check can set up computations - d1 = ds["variant_position"] > 10_000 - assert isinstance(d1, xr.DataArray) - d2 = ds["call_AD"].sum(axis=(1, 2)) - assert isinstance(d2, xr.DataArray) - - # check compress bug - pos = ds["variant_position"].data - assert pos.shape == pos.compute().shape - - -@pytest.mark.parametrize( - "sample_query", - [None, "taxon == 'funestus'", "taxon == 'robot'"], -) -def test_snp_calls__sample_query(sample_query): - af1 = setup_af1() - - sample_sets = "1229-VO-GH-DADZIE-VMF00095" - df_samples = af1.sample_metadata(sample_sets=sample_sets) - if sample_query is not None: - df_samples = df_samples.query(sample_query) - - if len(df_samples) == 0: - with pytest.raises(ValueError): - af1.snp_calls( - region="3RL", sample_sets=sample_sets, sample_query=sample_query - ) - - else: - ds = af1.snp_calls( - region="3RL", sample_sets=sample_sets, sample_query=sample_query - ) - assert ds.dims["samples"] == len(df_samples) - assert_array_equal(ds["sample_id"].values, df_samples["sample_id"].values) - - # TODO: test_snp_effects() for Af1.0 # # reverse strand gene # gste2 = "LOC125761549" @@ -1182,51 +873,6 @@ def test_allele_frequencies_advanced__nobs_mode(nobs_mode): ) -# TODO: here _check_gene_cnv_frequencies_advanced -# TODO: here test_gene_cnv_frequencies_advanced__ - - -@pytest.mark.parametrize( - "region", - [ - "2RL:1,000,000-2,000,000", - "LOC125761549_t5", - ["2RL:1,000,000-2,000,000", "3RL:1,000,000-2,000,000"], - ], -) -@pytest.mark.parametrize( - "sample_sets", - [ - "1229-VO-GH-DADZIE-VMF00095", - ["1240-VO-CD-KOEKEMOER-VMF00099", "1240-VO-MZ-KOEKEMOER-VMF00101"], - ], -) -@pytest.mark.parametrize("sample_query", [None, "taxon == 'funestus'"]) -@pytest.mark.parametrize("site_mask", [None, "funestus"]) -def test_snp_allele_counts(region, sample_sets, sample_query, site_mask): - results_cache = "../results_cache" - shutil.rmtree(results_cache, ignore_errors=True) - af1 = setup_af1(results_cache=results_cache) - - ac = af1.snp_allele_counts( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - ) - assert isinstance(ac, np.ndarray) - pos = af1.snp_sites(region=region, field="POS", site_mask=site_mask) - assert ac.shape == (pos.shape[0], 4) - - ac2 = af1.snp_allele_counts( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - ) - assert_array_equal(ac, ac2) - - @pytest.mark.parametrize( "region", [ @@ -1403,7 +1049,7 @@ def test_g123_gwss(): contig = "3RL" site_mask = "funestus" sample_sets = "1.0" - window_size = 1000 + window_size = 10_000 x, g123 = af1.g123_gwss( contig=contig, @@ -1413,7 +1059,7 @@ def test_g123_gwss(): sample_sets=sample_sets, window_size=window_size, min_cohort_size=20, - max_cohort_size=50, + max_cohort_size=30, ) # check dataset @@ -1421,9 +1067,9 @@ def test_g123_gwss(): assert isinstance(g123, np.ndarray) # check dimensions - assert len(x) == 15845 + assert len(x) == 1584 assert len(x) == len(g123) # check some values - assert_allclose(x[0], 185756.747) - assert_allclose(g123[11353], 0.022400000000000007) + assert_allclose(x[0], 253398.2095) + assert_allclose(g123[0], 0.04) diff --git a/tests/test_ag3.py b/tests/test_ag3.py index 442920516..3b79c6931 100644 --- a/tests/test_ag3.py +++ b/tests/test_ag3.py @@ -197,55 +197,6 @@ def test_genome_features_joined_arms_region(region): assert df["contig"].unique() == region.split(":")[0] -@pytest.mark.parametrize("mask", ["gamb_colu_arab", "gamb_colu", "arab"]) -@pytest.mark.parametrize("region", ["3L", ["2R:48,714,463-48,715,355", "AGAP007280"]]) -def test_site_filters(mask, region): - ag3 = setup_ag3() - filter_pass = ag3.site_filters(region=region, mask=mask) - assert isinstance(filter_pass, da.Array) - assert filter_pass.ndim == 1 - assert filter_pass.dtype == bool - - -@pytest.mark.parametrize("chunks", ["auto", "native"]) -@pytest.mark.parametrize("region", ["3L", ["2R:48,714,463-48,715,355", "AGAP007280"]]) -def test_snp_sites(chunks, region): - ag3 = setup_ag3() - - pos = ag3.snp_sites(region=region, field="POS", chunks=chunks) - ref = ag3.snp_sites(region=region, field="REF", chunks=chunks) - alt = ag3.snp_sites(region=region, field="ALT", chunks=chunks) - assert isinstance(pos, da.Array) - assert pos.ndim == 1 - assert pos.dtype == "i4" - assert isinstance(ref, da.Array) - assert ref.ndim == 1 - assert ref.dtype == "S1" - assert isinstance(alt, da.Array) - assert alt.ndim == 2 - assert alt.dtype == "S1" - assert pos.shape[0] == ref.shape[0] == alt.shape[0] - - # apply site mask - filter_pass = ag3.site_filters(region=region, mask="gamb_colu_arab").compute() - n_pass = np.count_nonzero(filter_pass) - pos_pass = ag3.snp_sites( - region=region, field="POS", site_mask="gamb_colu_arab", chunks=chunks - ) - assert isinstance(pos_pass, da.Array) - assert pos_pass.ndim == 1 - assert pos_pass.dtype == "i4" - assert pos_pass.shape[0] == n_pass - assert pos_pass.compute().shape == pos_pass.shape - for f in "POS", "REF", "ALT": - d = ag3.snp_sites( - region=region, site_mask="gamb_colu_arab", field=f, chunks=chunks - ) - assert isinstance(d, da.Array) - assert d.shape[0] == n_pass - assert d.shape == d.compute().shape - - @pytest.mark.parametrize("chrom", ["2RL", "3RL"]) def test_snp_sites_for_joined_arms(chrom): ag3 = setup_ag3() @@ -287,90 +238,6 @@ def test_snp_sites_for_joined_arms_region(region, field): assert sites.ndim == 2 -@pytest.mark.parametrize("chunks", ["auto", "native"]) -@pytest.mark.parametrize( - "sample_sets", - [None, "AG1000G-X", ["AG1000G-BF-A", "AG1000G-BF-B"], "3.0"], -) -@pytest.mark.parametrize("region", ["3L", ["2R:48,714,463-48,715,355", "AGAP007280"]]) -def test_snp_genotypes(chunks, sample_sets, region): - ag3 = setup_ag3() - - df_samples = ag3.sample_metadata(sample_sets=sample_sets) - gt = ag3.snp_genotypes(region=region, sample_sets=sample_sets, chunks=chunks) - assert isinstance(gt, da.Array) - assert gt.ndim == 3 - assert gt.dtype == "i1" - assert gt.shape[1] == len(df_samples) - - # specific fields - x = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, field="GT", chunks=chunks - ) - assert isinstance(x, da.Array) - assert x.ndim == 3 - assert x.dtype == "i1" - - x = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, field="GQ", chunks=chunks - ) - assert isinstance(x, da.Array) - assert x.ndim == 2 - assert x.dtype == "i2" # FIXME: non Ag3.0 set (inc. Af1.0) are "int8" instead - - x = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, field="MQ", chunks=chunks - ) - assert isinstance(x, da.Array) - assert x.ndim == 2 - assert x.dtype == "i2" # FIXME: non Ag3.0 set (inc. Af1.0) are "float32" instead - - x = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, field="AD", chunks=chunks - ) - assert isinstance(x, da.Array) - assert x.ndim == 3 - assert x.dtype == "i2" - - # site mask - filter_pass = ag3.site_filters(region=region, mask="gamb_colu_arab").compute() - gt_pass = ag3.snp_genotypes( - region=region, - sample_sets=sample_sets, - site_mask="gamb_colu_arab", - chunks=chunks, - ) - assert isinstance(gt_pass, da.Array) - assert gt_pass.ndim == 3 - assert gt_pass.dtype == "i1" - assert gt_pass.shape[0] == np.count_nonzero(filter_pass) - assert gt_pass.shape[1] == len(df_samples) - assert gt_pass.shape[2] == 2 - - -@pytest.mark.parametrize( - "sample_sets", - [None, "AG1000G-X", ["AG1000G-BF-A", "AG1000G-BF-B"], "3.0"], -) -@pytest.mark.parametrize("region", ["X"]) -def test_snp_genotypes_chunks(sample_sets, region): - ag3 = setup_ag3() - gt_native = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, chunks="native" - ) - gt_auto = ag3.snp_genotypes(region=region, sample_sets=sample_sets, chunks="auto") - gt_manual = ag3.snp_genotypes( - region=region, sample_sets=sample_sets, chunks=(100_000, 10, 2) - ) - - assert gt_native.chunks != gt_auto.chunks - assert gt_auto.chunks != gt_manual.chunks - assert gt_manual.chunks != gt_native.chunks - assert gt_manual.chunks[0][0] == 100_000 - assert gt_manual.chunks[1][0] == 10 - assert gt_manual.chunks[2][0] == 2 - - @pytest.mark.parametrize("chrom", ["2RL", "3RL"]) def test_snp_genotypes_for_joined_arms(chrom): ag3 = setup_ag3() @@ -402,20 +269,6 @@ def test_snp_genotypes_for_joined_arms_region(region): assert sites.shape[0] == gt.shape[0] -@pytest.mark.parametrize( - "region", - ["AGAP007280", "2R:48714463-48715355", "3L"], -) -@pytest.mark.parametrize("mask", ["gamb_colu_arab", "gamb_colu", "arab"]) -def test_is_accessible(region, mask): - ag3 = setup_ag3() - # run a couple of tests - is_accessible = ag3.is_accessible(region=region, site_mask=mask) - assert isinstance(is_accessible, np.ndarray) - assert is_accessible.ndim == 1 - assert is_accessible.shape[0] == ag3.genome_sequence(region).shape[0] - - def test_cross_metadata(): ag3 = setup_ag3() df_crosses = ag3.cross_metadata() @@ -434,154 +287,6 @@ def test_cross_metadata(): assert df_crosses["sex"].unique().tolist() == expected_sex_values -@pytest.mark.parametrize("region", ["3L", "AGAP007280", "2R:48714463-48715355"]) -@pytest.mark.parametrize("site_mask", [None, "gamb_colu_arab"]) -def test_site_annotations(region, site_mask): - ag3 = setup_ag3() - - ds_snp = ag3.snp_variants(region=region, site_mask=site_mask) - n_variants = ds_snp.dims["variants"] - ds_ann = ag3.site_annotations(region=region, site_mask=site_mask) - # site annotations dataset is aligned with SNP sites - assert ds_ann.dims["variants"] == n_variants - assert isinstance(ds_ann, xr.Dataset) - for f in ( - "codon_degeneracy", - "codon_nonsyn", - "codon_position", - "seq_cls", - "seq_flen", - "seq_relpos_start", - "seq_relpos_stop", - ): - d = ds_ann[f] - assert d.ndim == 1 - assert d.dims == ("variants",) - assert d.shape == (n_variants,) - - -@pytest.mark.parametrize( - "sample_sets", - [None, "AG1000G-X", ["AG1000G-BF-A", "AG1000G-BF-B"], "3.0"], -) -@pytest.mark.parametrize("region", ["3L", ["2R:48,714,463-48,715,355", "AGAP007280"]]) -@pytest.mark.parametrize("site_mask", [None, "gamb_colu_arab"]) -def test_snp_calls(sample_sets, region, site_mask): - ag3 = setup_ag3() - - ds = ag3.snp_calls(region=region, sample_sets=sample_sets, site_mask=site_mask) - assert isinstance(ds, xr.Dataset) - - # check fields - expected_data_vars = { - "variant_allele", - "variant_filter_pass_gamb_colu_arab", - "variant_filter_pass_gamb_colu", - "variant_filter_pass_arab", - "call_genotype", - "call_genotype_mask", - "call_GQ", - "call_AD", - "call_MQ", - } - assert set(ds.data_vars) == expected_data_vars - - expected_coords = { - "variant_contig", - "variant_position", - "sample_id", - } - assert set(ds.coords) == expected_coords - - # check dimensions - assert set(ds.dims) == {"alleles", "ploidy", "samples", "variants"} - - # check dim lengths - pos = ag3.snp_sites(region=region, field="POS", site_mask=site_mask) - n_variants = len(pos) - df_samples = ag3.sample_metadata(sample_sets=sample_sets) - n_samples = len(df_samples) - assert ds.dims["variants"] == n_variants - assert ds.dims["samples"] == n_samples - assert ds.dims["ploidy"] == 2 - assert ds.dims["alleles"] == 4 - - # check shapes - for f in expected_coords | expected_data_vars: - x = ds[f] - assert isinstance(x, xr.DataArray) - assert isinstance(x.data, da.Array) - - if f == "variant_allele": - assert x.ndim == 2 - assert x.shape == (n_variants, 4) - assert x.dims == ("variants", "alleles") - elif f.startswith("variant_"): - assert x.ndim == 1 - assert x.shape == (n_variants,) - assert x.dims == ("variants",) - elif f in {"call_genotype", "call_genotype_mask"}: - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "ploidy") - assert x.shape == (n_variants, n_samples, 2) - elif f == "call_AD": - assert x.ndim == 3 - assert x.dims == ("variants", "samples", "alleles") - assert x.shape == (n_variants, n_samples, 4) - elif f.startswith("call_"): - assert x.ndim == 2 - assert x.dims == ("variants", "samples") - assert x.shape == (n_variants, n_samples) - elif f.startswith("sample_"): - assert x.ndim == 1 - assert x.dims == ("samples",) - assert x.shape == (n_samples,) - - # check samples - expected_samples = df_samples["sample_id"].tolist() - assert ds["sample_id"].values.tolist() == expected_samples - - # check attributes - assert "contigs" in ds.attrs - assert ds.attrs["contigs"] == ("2R", "2L", "3R", "3L", "X") - - # check can set up computations - d1 = ds["variant_position"] > 10_000 - assert isinstance(d1, xr.DataArray) - d2 = ds["call_AD"].sum(axis=(1, 2)) - assert isinstance(d2, xr.DataArray) - - # check compress bug - pos = ds["variant_position"].data - assert pos.shape == pos.compute().shape - - -@pytest.mark.parametrize( - "sample_query", - [None, "taxon == 'coluzzii'", "taxon == 'robot'"], -) -def test_snp_calls__sample_query(sample_query): - ag3 = setup_ag3() - - sample_sets = "AG1000G-BF-A" - df_samples = ag3.sample_metadata(sample_sets=sample_sets) - if sample_query is not None: - df_samples = df_samples.query(sample_query) - - if len(df_samples) == 0: - with pytest.raises(ValueError): - ag3.snp_calls( - region="3L", sample_sets=sample_sets, sample_query=sample_query - ) - - else: - ds = ag3.snp_calls( - region="3L", sample_sets=sample_sets, sample_query=sample_query - ) - assert ds.dims["samples"] == len(df_samples) - assert_array_equal(ds["sample_id"].values, df_samples["sample_id"].values) - - @pytest.mark.parametrize("chrom", ["2RL", "3RL"]) def test_snp_calls_for_joined_arms(chrom): ag3 = setup_ag3() @@ -949,7 +654,7 @@ def test_cnv_hmm(sample_sets, region): ) else: # test part of a contig region - region = ag3.resolve_region(region) + region = resolve_region(ag3, region) variant_contig = ds["variant_contig"].values contig_index = ds.attrs["contigs"].index(region.contig) assert np.all(variant_contig == contig_index) @@ -1162,7 +867,7 @@ def test_cnv_coverage_calls(sample_set, analysis, region): assert ds.attrs["contigs"] == ("2R", "2L", "3R", "3L", "X") # check region - region = ag3.resolve_region(region) + region = resolve_region(ag3, region) if ( isinstance(region, Region) and region.start is not None @@ -2672,43 +2377,6 @@ def test_gene_cnv_frequencies_advanced__dup_samples(): assert ds.dims == ds_dup.dims -@pytest.mark.parametrize( - "region", - [ - "2R:1,000,000-2,000,000", - "AGAP004707", - ["2R:1,000,000-2,000,000", "2L:1,000,000-2,000,000"], - ], -) -@pytest.mark.parametrize( - "sample_sets", ["AG1000G-AO", ["AG1000G-BF-A", "AG1000G-BF-B"]] -) -@pytest.mark.parametrize("sample_query", [None, "taxon == 'coluzzii'"]) -@pytest.mark.parametrize("site_mask", [None, "gamb_colu_arab"]) -def test_snp_allele_counts(region, sample_sets, sample_query, site_mask): - results_cache = "../results_cache" - shutil.rmtree(results_cache, ignore_errors=True) - ag3 = setup_ag3(results_cache=results_cache) - - ac = ag3.snp_allele_counts( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - ) - assert isinstance(ac, np.ndarray) - pos = ag3.snp_sites(region=region, field="POS", site_mask=site_mask) - assert ac.shape == (pos.shape[0], 4) - - ac2 = ag3.snp_allele_counts( - region=region, - sample_sets=sample_sets, - sample_query=sample_query, - site_mask=site_mask, - ) - assert_array_equal(ac, ac2) - - @pytest.mark.parametrize( "region", [ diff --git a/tests/test_anopheles.py b/tests/test_anopheles.py index 5585e0e92..7ffb37f73 100644 --- a/tests/test_anopheles.py +++ b/tests/test_anopheles.py @@ -1,17 +1,16 @@ import os -import dask.array as da import numpy as np import pandas as pd import pytest import xarray as xr -import zarr from numpy.testing import assert_allclose from pandas.testing import assert_frame_equal from malariagen_data import Af1, Ag3, Region from malariagen_data.af1 import GCS_URL as AF1_GCS_URL from malariagen_data.ag3 import GCS_URL as AG3_GCS_URL +from malariagen_data.util import resolve_region expected_cohort_cols = ( "country_iso", @@ -184,58 +183,6 @@ def test_sample_metadata(subclass, major_release, sample_set, sample_sets): assert_frame_equal(df_default, df_all) -@pytest.mark.parametrize( - "subclass,mask", - [(Ag3, "gamb_colu_arab"), (Ag3, "gamb_colu"), (Ag3, "arab"), (Af1, "funestus")], -) -def test_open_site_filters(subclass, mask): - # check can open the zarr directly - anoph = setup_subclass_cached(subclass) - root = anoph.open_site_filters(mask=mask) - assert isinstance(root, zarr.hierarchy.Group) - for contig in anoph.contigs: - assert contig in root - - -@pytest.mark.parametrize("subclass", [Ag3, Af1]) -def test_open_snp_sites(subclass): - anoph = setup_subclass_cached(subclass) - root = anoph.open_snp_sites() - assert isinstance(root, zarr.hierarchy.Group) - for contig in anoph.contigs: - assert contig in root - - -@pytest.mark.parametrize( - "subclass,sample_set", [(Ag3, "AG1000G-AO"), (Af1, "1229-VO-GH-DADZIE-VMF00095")] -) -def test_open_snp_genotypes(subclass, sample_set): - # check can open the zarr directly - anoph = setup_subclass_cached(subclass) - root = anoph.open_snp_genotypes(sample_set=sample_set) - assert isinstance(root, zarr.hierarchy.Group) - for contig in anoph.contigs: - assert contig in root - - -@pytest.mark.parametrize("subclass", [Ag3, Af1]) -def test_genome(subclass): - anoph = setup_subclass_cached(subclass) - - # test the open_genome() method to access as zarr - genome = anoph.open_genome() - assert isinstance(genome, zarr.hierarchy.Group) - for contig in anoph.contigs: - assert contig in genome - assert genome[contig].dtype == "S1" - - # test the genome_sequence() method to access sequences - for contig in anoph.contigs: - seq = anoph.genome_sequence(contig) - assert isinstance(seq, da.Array) - assert seq.dtype == "S1" - - @pytest.mark.parametrize("subclass", [Ag3, Af1]) def test_sample_metadata_dtypes(subclass): anoph = setup_subclass_cached(subclass) @@ -360,37 +307,13 @@ def test_genome_features_region(subclass, region): assert len(df) > 0 # check region - region = anoph.resolve_region(region) + region = resolve_region(anoph, region) if isinstance(region, Region): assert np.all(df["contig"].values == region.contig) if region.start and region.end: assert np.all(df.eval(f"start <= {region.end} and end >= {region.start}")) -@pytest.mark.parametrize("subclass", [Ag3, Af1]) -def test_open_site_annotations(subclass): - anoph = setup_subclass_cached(subclass) - - # test access as zarr - root = anoph.open_site_annotations() - assert isinstance(root, zarr.hierarchy.Group) - for f in ( - "codon_degeneracy", - "codon_nonsyn", - "codon_position", - "seq_cls", - "seq_flen", - "seq_relpos_start", - "seq_relpos_stop", - ): - assert f in root - for contig in anoph.contigs: - assert contig in root[f] - z = root[f][contig] - # raw zarr data is aligned with genome sequence - assert z.shape == (len(anoph.genome_sequence(region=contig)),) - - @pytest.mark.parametrize( "subclass, sample_sets, universal_fields, transcript, site_mask, cohorts_analysis, expected_snp_count", [ @@ -785,7 +708,7 @@ def test_haplotypes__cohort_size(subclass, sample_sets, region, analysis, cohort def test_h12_calibration(subclass, sample_query, contig, analysis, sample_sets): anoph = setup_subclass_cached(subclass) - window_sizes = [10_000, 20_000] + window_sizes = (10_000, 20_000) calibration_runs = anoph.h12_calibration( contig=contig, analysis=analysis, @@ -816,7 +739,7 @@ def test_h12_calibration(subclass, sample_query, contig, analysis, sample_sets): def test_g123_calibration(subclass, sample_query, contig, site_mask, sample_sets): anoph = setup_subclass_cached(subclass) - window_sizes = [10_000, 20_000] + window_sizes = (10_000, 20_000) calibration_runs = anoph.g123_calibration( contig=contig, sites=site_mask,